bamfcsv 0.0.1

Sign up to get free protection for your applications and to get access to all the features.
@@ -0,0 +1,11 @@
1
+ *.o
2
+ *.bundle
3
+ *.dSYM
4
+ a.out
5
+ *.so
6
+ *~
7
+ \#*
8
+ *.csv*
9
+ !spec/fixtures/*.csv*
10
+ Makefile
11
+ pkg
data/.rspec ADDED
@@ -0,0 +1,2 @@
1
+ --format Fuubar
2
+ --colour
data/Gemfile ADDED
@@ -0,0 +1,4 @@
1
+ source "http://rubygems.org"
2
+
3
+ # Specify your gem's dependencies in bamfcsv.gemspec
4
+ gemspec
@@ -0,0 +1,35 @@
1
+ PATH
2
+ remote: .
3
+ specs:
4
+ bamfcsv (0.0.1)
5
+
6
+ GEM
7
+ remote: http://rubygems.org/
8
+ specs:
9
+ chalofa_ruby-progressbar (0.0.9.1)
10
+ diff-lcs (1.1.2)
11
+ fuubar (0.0.4)
12
+ chalofa_ruby-progressbar (~> 0.0.9)
13
+ rspec (~> 2.0)
14
+ rspec-instafail (~> 0.1.4)
15
+ rake (0.8.7)
16
+ rake-compiler (0.7.6)
17
+ rake
18
+ rspec (2.5.0)
19
+ rspec-core (~> 2.5.0)
20
+ rspec-expectations (~> 2.5.0)
21
+ rspec-mocks (~> 2.5.0)
22
+ rspec-core (2.5.1)
23
+ rspec-expectations (2.5.0)
24
+ diff-lcs (~> 1.1.2)
25
+ rspec-instafail (0.1.7)
26
+ rspec-mocks (2.5.0)
27
+
28
+ PLATFORMS
29
+ ruby
30
+
31
+ DEPENDENCIES
32
+ bamfcsv!
33
+ fuubar (~> 0.0.2)
34
+ rake-compiler (~> 0.7.1)
35
+ rspec (~> 2.5.0)
data/README ADDED
@@ -0,0 +1,3 @@
1
+ BAMF,CSV,parser
2
+ OHHH,YEAH,there,are,so
3
+ MANy,cahrs,in,here.
@@ -0,0 +1,4 @@
1
+ require 'bundler'
2
+ Bundler::GemHelper.install_tasks
3
+
4
+ Dir['tasks/*.rake'].sort.each { |f| load f }
@@ -0,0 +1,26 @@
1
+ # -*- encoding: utf-8 -*-
2
+ $:.push File.expand_path("../lib", __FILE__)
3
+ require "bamfcsv/version"
4
+
5
+ Gem::Specification.new do |s|
6
+ s.name = "bamfcsv"
7
+ s.version = BAMFCSV::VERSION
8
+ s.platform = Gem::Platform::RUBY
9
+ s.authors = ["Jon Distad", "Alex Redington"]
10
+ s.email = ["jon@thinkrelevance.com", "lovemachine@thinkrelevance.com"]
11
+ s.homepage = "https://github.com/jondistad/bamfcsv"
12
+ s.summary = %q{BAMF!!! Your csv is parsed.}
13
+ s.description = %q{BAMFCSV parses csv like a BAMF. BAMF!!}
14
+
15
+ s.rubyforge_project = "bamfcsv"
16
+
17
+ s.files = `git ls-files`.split("\n")
18
+ s.test_files = `git ls-files -- {test,spec,features}/*`.split("\n")
19
+ s.executables = `git ls-files -- bin/*`.split("\n").map{ |f| File.basename(f) }
20
+ s.require_paths = ["lib", "ext"]
21
+ s.extensions = %w{ext/bamfcsv/extconf.rb}
22
+
23
+ s.add_development_dependency "rspec", "~> 2.5.0"
24
+ s.add_development_dependency "fuubar", "~> 0.0.2"
25
+ s.add_development_dependency 'rake-compiler', "~> 0.7.1"
26
+ end
@@ -0,0 +1,191 @@
1
+ #include <stdlib.h>
2
+ #include <ruby/ruby.h>
3
+ #include <fcntl.h>
4
+ #include <sys/mman.h>
5
+ #include "bamfcsv_ext.h"
6
+
7
+ struct s_Row *alloc_row() {
8
+
9
+ struct s_Row *new_row = malloc(sizeof(struct s_Row));
10
+
11
+ new_row -> first_cell = 0;
12
+ new_row -> next_row = 0;
13
+ new_row -> cell_count = 0;
14
+
15
+ return new_row;
16
+
17
+ }
18
+
19
+ struct s_Cell *alloc_cell() {
20
+
21
+ struct s_Cell *new_cell = malloc(sizeof(struct s_Cell));
22
+
23
+ new_cell -> start = 0;
24
+ new_cell -> len = 0;
25
+ new_cell -> next_cell = 0;
26
+ new_cell -> has_quotes = 0;
27
+
28
+ return new_cell;
29
+
30
+ }
31
+
32
+ void free_cell(struct s_Cell *cell) {
33
+
34
+ if (cell != 0) {
35
+ free_cell(cell->next_cell);
36
+ free(cell);
37
+ }
38
+
39
+ }
40
+
41
+ void free_row(struct s_Row *row) {
42
+
43
+ if (row != 0) {
44
+
45
+ free_row(row->next_row);
46
+ free_cell(row->first_cell);
47
+ free(row);
48
+
49
+ }
50
+
51
+ }
52
+
53
+ VALUE build_matrix_from_pointer_tree(struct s_Row *first_row, int num_rows) {
54
+ VALUE matrix;
55
+ VALUE row;
56
+ VALUE new_string;
57
+ int i,j;
58
+ struct s_Row *cur_row;
59
+ struct s_Cell *cur_cell;
60
+
61
+ cur_row = first_row;
62
+ matrix = rb_ary_new2(num_rows);
63
+
64
+ ID gsub = rb_intern("gsub!");
65
+ VALUE dquote = rb_str_new2("\"\""), quote = rb_str_new2("\"");
66
+
67
+ for (i = 0; i < num_rows; i++) {
68
+
69
+ cur_cell = cur_row->first_cell;
70
+ row = rb_ary_new2(cur_row->cell_count);
71
+ rb_ary_store(matrix,i,row);
72
+ for (j = 0; j < cur_row->cell_count; j++) {
73
+ if (*(cur_cell->start) == '"'
74
+ && *((cur_cell->start)+((cur_cell->len-1)*sizeof(char))) == '"')
75
+ new_string = rb_str_new(cur_cell->start+sizeof(char), cur_cell->len-(sizeof(char)*2));
76
+ else
77
+ new_string = rb_str_new(cur_cell->start, cur_cell->len);
78
+ if (cur_cell->has_quotes) {
79
+ rb_funcall(new_string, gsub, 2, dquote, quote);
80
+ }
81
+ rb_ary_store(row, j, new_string);
82
+ cur_cell = cur_cell->next_cell;
83
+ }
84
+ cur_row = cur_row->next_row;
85
+ }
86
+
87
+ return matrix;
88
+ }
89
+
90
+ void finalize_cell(struct s_Cell *cell, char *cur) {
91
+ if (*(cur-sizeof(char)) == '\r')
92
+ cell->len = cur-(cell->start)-sizeof(char);
93
+ else
94
+ cell->len = cur-(cell->start);
95
+ }
96
+
97
+ VALUE build_matrix(char *buf, int bufsize) {
98
+ int str_start = 0;
99
+ int num_rows = 1;
100
+ int in_quote = 0;
101
+
102
+ struct s_Row *first_row = alloc_row();
103
+ struct s_Row *cur_row = first_row;
104
+ struct s_Cell *cur_cell = alloc_cell();
105
+ first_row->first_cell = cur_cell;
106
+ cur_cell->start = buf;
107
+
108
+ VALUE matrix;
109
+
110
+ char *cur;
111
+
112
+ for (cur = buf; cur < buf+bufsize; cur++) {
113
+
114
+ if (*cur == '"') {
115
+ if (in_quote)
116
+ if (*(cur+1) != ',')
117
+ cur_cell->has_quotes = 1;
118
+ in_quote = !in_quote;
119
+ }
120
+
121
+ if (!in_quote) {
122
+
123
+ if (*cur == ',') {
124
+
125
+ finalize_cell(cur_cell,cur);
126
+ cur_cell->next_cell = alloc_cell();
127
+ cur_cell = cur_cell->next_cell;
128
+ cur_cell->start = cur+sizeof(char);
129
+ cur_row->cell_count += 1;
130
+
131
+ }
132
+
133
+ if (*cur == '\n') {
134
+
135
+ finalize_cell(cur_cell,cur);
136
+ cur_row->cell_count += 1;
137
+ cur_row->next_row = alloc_row();
138
+ cur_row = cur_row -> next_row;
139
+ cur_row->first_cell = alloc_cell();
140
+ cur_cell = cur_row->first_cell;
141
+ cur_cell->start = cur+sizeof(char);
142
+
143
+ num_rows++;
144
+
145
+ }
146
+
147
+ }
148
+
149
+ }
150
+
151
+ if (cur_row->cell_count == 0) {
152
+ num_rows--;
153
+ }
154
+
155
+ matrix = build_matrix_from_pointer_tree(first_row, num_rows);
156
+
157
+ free_row(first_row);
158
+
159
+ return matrix;
160
+
161
+ }
162
+
163
+ VALUE mm_parse(const char *file) {
164
+
165
+ char *mmapped_csv;
166
+ int filesize, csv;
167
+
168
+ csv = open(file, O_RDONLY);
169
+ filesize = lseek(csv, 0, SEEK_END);
170
+ mmapped_csv = (char*) mmap(0, filesize, PROT_READ, MAP_SHARED, csv, 0);
171
+
172
+ VALUE matrix = build_matrix(mmapped_csv,filesize);
173
+
174
+ munmap(mmapped_csv, filesize);
175
+ close(csv);
176
+
177
+ return matrix;
178
+ }
179
+
180
+ VALUE read_path(VALUE self, VALUE file) {
181
+
182
+ return mm_parse(RSTRING_PTR(file));
183
+
184
+ }
185
+
186
+ void Init_bamfcsv() {
187
+
188
+ VALUE module = rb_define_module("BAMFCSV");
189
+ rb_define_module_function(module, "read_path", read_path, 1);
190
+
191
+ }
@@ -0,0 +1,19 @@
1
+ #ifndef _BAMFCSV_EXT_H
2
+ #define _BAMFCSV_EXT_H
3
+
4
+ struct s_Row {
5
+ struct s_Cell *first_cell;
6
+ struct s_Row *next_row;
7
+ int cell_count;
8
+ };
9
+
10
+ struct s_Cell {
11
+ char *start;
12
+ int len;
13
+ int has_quotes;
14
+ struct s_Cell *next_cell;
15
+ };
16
+
17
+ void Init_bamfcsv();
18
+
19
+ #endif
@@ -0,0 +1,2 @@
1
+ require 'mkmf'
2
+ create_makefile('bamfcsv')
@@ -0,0 +1,13 @@
1
+ require 'bamfcsv/bamfcsv'
2
+
3
+ module BAMFCSV
4
+
5
+ def self.read(thing_to_read)
6
+ if String === thing_to_read
7
+ raise Errno::ENOENT.new("#{thing_to_read} does not exist") unless File.exist? thing_to_read
8
+ raise Errno::EISDIR.new("#{thing_to_read} is a directory") if File.directory? thing_to_read
9
+ read_path(thing_to_read)
10
+ end
11
+ end
12
+
13
+ end
@@ -0,0 +1,3 @@
1
+ module BAMFCSV
2
+ VERSION = "0.0.1"
3
+ end
@@ -0,0 +1 @@
1
+ BAMF,,CSV
@@ -0,0 +1,3 @@
1
+ this is a semicolon:, ;
2
+ this is a comma:," ,"
3
+ this is a quote:," """
File without changes
@@ -0,0 +1,2 @@
1
+ this is a semicolon:, ;
2
+ this is a comma:," ,"
@@ -0,0 +1,2 @@
1
+ BAMF
2
+ CSV
@@ -0,0 +1,3 @@
1
+ BAMF,CSV,parser
2
+ OHHH,YEAH,there,are,so
3
+ MANy,cahrs,in,here.
@@ -0,0 +1,49 @@
1
+ require 'spec_helper'
2
+
3
+ describe BAMFCSV do
4
+ it "has a parse method" do
5
+ BAMFCSV.should respond_to(:read)
6
+ end
7
+
8
+ describe "#read" do
9
+ it "is a matrix given a filename" do
10
+ BAMFCSV.read("spec/fixtures/test.csv").should be_instance_of Array
11
+ end
12
+
13
+ it "is an empty array passed an empty file" do
14
+ BAMFCSV.read("spec/fixtures/empty.csv").should == []
15
+ end
16
+
17
+ it "is a 1xn matrix with a one column csv file" do
18
+ BAMFCSV.read("spec/fixtures/one-column.csv").should == [["BAMF"],["CSV"]]
19
+ end
20
+
21
+ it "interprets empty cells correctly" do
22
+ BAMFCSV.read("spec/fixtures/bamf-comma-comma.csv").should == [["BAMF","","CSV"]]
23
+ end
24
+
25
+ it "escapes cells that are quoted" do
26
+ BAMFCSV.read("spec/fixtures/escapes.csv").should == [["this is a semicolon:", " ;"],["this is a comma:", " ,"]]
27
+ end
28
+
29
+ it "escapes cells that are quoted" do
30
+ BAMFCSV.read("spec/fixtures/double-quotes.csv").should == [["this is a semicolon:", " ;"], ["this is a comma:", " ,"], ["this is a quote:", " \""]]
31
+ end
32
+
33
+ it "doesn't create a row when the file terminates with [CR]LF" do
34
+ BAMFCSV.read("spec/fixtures/terminated-with-cr.csv").should == [["a"],["b"]]
35
+ end
36
+
37
+ it "raises Errno::ENOENT when the file does not exist" do
38
+ expect do
39
+ BAMFCSV.read("spec/fixtures/this-file-does-not-not-exist.csv")
40
+ end.should raise_error Errno::ENOENT
41
+ end
42
+
43
+ it "raises Errno::EISDIR when the file is not a flat file" do
44
+ expect do
45
+ BAMFCSV.read("spec/fixtures/")
46
+ end.should raise_error Errno::EISDIR
47
+ end
48
+ end
49
+ end
@@ -0,0 +1,12 @@
1
+ root_path = File.expand_path(File.join(File.dirname(__FILE__), '..'))
2
+ $:.unshift(File.join(root_path, "lib"))
3
+ $:.unshift(File.join(root_path, "ext"))
4
+ require 'bundler'
5
+ require 'bamfcsv'
6
+ Bundler.require(:development)
7
+
8
+ RSpec.configure do |config|
9
+ config.filter_run :focused => true
10
+ config.alias_example_to :fit, :focused => true
11
+ config.run_all_when_everything_filtered = true
12
+ end
@@ -0,0 +1,11 @@
1
+ require "rake/extensiontask"
2
+
3
+ def gemspec
4
+ @clean_gemspec ||= eval(File.read(File.expand_path('../../bamfcsv.gemspec', __FILE__)))
5
+ end
6
+
7
+ Rake::ExtensionTask.new("bamfcsv", gemspec) do |ext|
8
+ ext.lib_dir = File.join 'lib', 'bamfcsv'
9
+ CLEAN.include "#{ext.lib_dir}/*.#{RbConfig::CONFIG['DLEXT']}"
10
+ end
11
+ Rake::Task[:spec].prerequisites << :compile
@@ -0,0 +1,12 @@
1
+ begin
2
+ require 'rspec'
3
+ require 'rspec/core/rake_task'
4
+
5
+ RSpec::Core::RakeTask.new('spec') do |t|
6
+ t.verbose = true
7
+ end
8
+
9
+ task :default => :spec
10
+ rescue LoadError
11
+ puts "rspec, or one of its dependencies, is not available. Install it with: sudo gem install rspec"
12
+ end
metadata ADDED
@@ -0,0 +1,122 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: bamfcsv
3
+ version: !ruby/object:Gem::Version
4
+ prerelease:
5
+ version: 0.0.1
6
+ platform: ruby
7
+ authors:
8
+ - Jon Distad
9
+ - Alex Redington
10
+ autorequire:
11
+ bindir: bin
12
+ cert_chain: []
13
+
14
+ date: 2011-04-01 00:00:00 -04:00
15
+ default_executable:
16
+ dependencies:
17
+ - !ruby/object:Gem::Dependency
18
+ name: rspec
19
+ prerelease: false
20
+ requirement: &id001 !ruby/object:Gem::Requirement
21
+ none: false
22
+ requirements:
23
+ - - ~>
24
+ - !ruby/object:Gem::Version
25
+ version: 2.5.0
26
+ type: :development
27
+ version_requirements: *id001
28
+ - !ruby/object:Gem::Dependency
29
+ name: fuubar
30
+ prerelease: false
31
+ requirement: &id002 !ruby/object:Gem::Requirement
32
+ none: false
33
+ requirements:
34
+ - - ~>
35
+ - !ruby/object:Gem::Version
36
+ version: 0.0.2
37
+ type: :development
38
+ version_requirements: *id002
39
+ - !ruby/object:Gem::Dependency
40
+ name: rake-compiler
41
+ prerelease: false
42
+ requirement: &id003 !ruby/object:Gem::Requirement
43
+ none: false
44
+ requirements:
45
+ - - ~>
46
+ - !ruby/object:Gem::Version
47
+ version: 0.7.1
48
+ type: :development
49
+ version_requirements: *id003
50
+ description: BAMFCSV parses csv like a BAMF. BAMF!!
51
+ email:
52
+ - jon@thinkrelevance.com
53
+ - lovemachine@thinkrelevance.com
54
+ executables: []
55
+
56
+ extensions:
57
+ - ext/bamfcsv/extconf.rb
58
+ extra_rdoc_files: []
59
+
60
+ files:
61
+ - .gitignore
62
+ - .rspec
63
+ - Gemfile
64
+ - Gemfile.lock
65
+ - README
66
+ - Rakefile
67
+ - bamfcsv.gemspec
68
+ - ext/bamfcsv/bamfcsv_ext.c
69
+ - ext/bamfcsv/bamfcsv_ext.h
70
+ - ext/bamfcsv/extconf.rb
71
+ - lib/bamfcsv.rb
72
+ - lib/bamfcsv/version.rb
73
+ - spec/fixtures/bamf-comma-comma.csv
74
+ - spec/fixtures/double-quotes.csv
75
+ - spec/fixtures/empty.csv
76
+ - spec/fixtures/escapes.csv
77
+ - spec/fixtures/one-column.csv
78
+ - spec/fixtures/terminated-with-cr.csv
79
+ - spec/fixtures/test.csv
80
+ - spec/lib/bamfcsv_spec.rb
81
+ - spec/spec_helper.rb
82
+ - tasks/compile.rake
83
+ - tasks/rspec.rake
84
+ has_rdoc: true
85
+ homepage: https://github.com/jondistad/bamfcsv
86
+ licenses: []
87
+
88
+ post_install_message:
89
+ rdoc_options: []
90
+
91
+ require_paths:
92
+ - lib
93
+ - ext
94
+ required_ruby_version: !ruby/object:Gem::Requirement
95
+ none: false
96
+ requirements:
97
+ - - ">="
98
+ - !ruby/object:Gem::Version
99
+ version: "0"
100
+ required_rubygems_version: !ruby/object:Gem::Requirement
101
+ none: false
102
+ requirements:
103
+ - - ">="
104
+ - !ruby/object:Gem::Version
105
+ version: "0"
106
+ requirements: []
107
+
108
+ rubyforge_project: bamfcsv
109
+ rubygems_version: 1.6.2
110
+ signing_key:
111
+ specification_version: 3
112
+ summary: BAMF!!! Your csv is parsed.
113
+ test_files:
114
+ - spec/fixtures/bamf-comma-comma.csv
115
+ - spec/fixtures/double-quotes.csv
116
+ - spec/fixtures/empty.csv
117
+ - spec/fixtures/escapes.csv
118
+ - spec/fixtures/one-column.csv
119
+ - spec/fixtures/terminated-with-cr.csv
120
+ - spec/fixtures/test.csv
121
+ - spec/lib/bamfcsv_spec.rb
122
+ - spec/spec_helper.rb