ccsv 0.1.2 → 1.0.1

Sign up to get free protection for your applications and to get access to all the features.

Potentially problematic release.


This version of ccsv might be problematic. Click here for more details.

data/CHANGELOG CHANGED
@@ -1,3 +1,7 @@
1
+ v1.0.1 Tests moved to minitest (zhum).
2
+
3
+ v1.0.0 Allow custom delimiter and integer range filtration (zhum).
4
+
1
5
  v0.1.2. Allow lines of any length (crossman).
2
6
 
3
7
  v0.1.1 Remove env.h and node.h for compilation for ruby 1.9.3 (pronix).
data/Gemfile ADDED
@@ -0,0 +1,7 @@
1
+ group :test do
2
+ gem 'minitest'
3
+ end
4
+
5
+ group :development do
6
+ gem 'echoe'
7
+ end
@@ -0,0 +1,23 @@
1
+ GEM
2
+ specs:
3
+ allison (2.0.3)
4
+ echoe (4.6.5)
5
+ allison (>= 2.0.3)
6
+ rake (>= 0.9.2)
7
+ rdoc (>= 2.5.11)
8
+ rubyforge (>= 2.0.4)
9
+ json (1.8.1)
10
+ json_pure (1.8.1)
11
+ minitest (5.2.1)
12
+ rake (10.1.0)
13
+ rdoc (4.0.1)
14
+ json (~> 1.4)
15
+ rubyforge (2.0.4)
16
+ json_pure (>= 1.1.7)
17
+
18
+ PLATFORMS
19
+ ruby
20
+
21
+ DEPENDENCIES
22
+ echoe
23
+ minitest
data/Manifest CHANGED
@@ -1,11 +1,12 @@
1
1
  CHANGELOG
2
+ Gemfile
3
+ Gemfile.lock
2
4
  LICENSE
3
5
  Manifest
4
6
  README.rdoc
5
7
  Rakefile
8
+ compile
6
9
  ext/ccsv.c
7
10
  ext/ccsv.h
8
11
  ext/extconf.rb
9
- test/data.csv
10
- test/data_small.csv
11
- test/unit/test_ccsv.rb
12
+ spec/ccsv_spec.rb
@@ -3,13 +3,40 @@ Ccsv
3
3
 
4
4
  A pure-C CSV parser.
5
5
 
6
- for after make changes - add description to CHANGELOG and run rake build
6
+ == Installation
7
7
 
8
- == License
8
+ gem install ccsv
9
+
10
+ == Usage
11
+
12
+ require 'rubygems'
13
+ require 'ccsv'
14
+
15
+ Ccsv.foreach("data.csv") do |line|
16
+ # Do something with the line array
17
+ end
18
+
19
+ # print all logins
20
+ Ccsv.foreach("/etc/passwd",":") do |line|
21
+ puts line[0]
22
+ end
9
23
 
10
- Copyright 2007, 2008 Cloudburst, LLC. Licensed under the AFL 3. See the included LICENSE file.
24
+ == Advanced usage
11
25
 
12
- The public certificate for the gem is here[http://blog.evanweaver.com/files/evan_weaver-original-public_cert.pem].
26
+ Get users from passwd file, with UIDs between 1000 and 1010 and between 2000 and 2010.
27
+ Works with numbers only!
28
+ Third argument is column index, used for filtering, then one or more intervals.
29
+
30
+ Ccsv.foreach("/etc/passwd",":",2,[1000..1010],[2000..2010]) do |line|
31
+ puts line[0]
32
+ end
33
+
34
+ == Contacts
35
+
36
+ Forks, pull-requests and other contacts via guthub: http://github.com/evan/ccsv/
37
+
38
+ == License
13
39
 
14
- If you use this software, please {make a donation}[http://blog.evanweaver.com/donate/], or {recommend Evan}[http://www.workingwithrails.com/person/7739-evan-weaver] at Working with Rails.
40
+ Copyright 2012-2013 Sergey Zhumatiy
15
41
 
42
+ Copyright 2007-2012 Cloudburst, LLC. Licensed under the AFL 3. See the included LICENSE file.
data/Rakefile CHANGED
@@ -1,9 +1,19 @@
1
+ require 'bundler'
2
+ Bundler.require(:default, :development)
3
+
1
4
  require 'echoe'
2
5
 
3
6
  Echoe.new("ccsv") do |p|
4
- p.author = "Evan Weaver"
7
+ p.author = ["Evan Weaver","Sergey Zhumatiy"]
5
8
  p.project = "evan"
9
+ p.email = "serg@parallel.ru"
6
10
  p.summary = "A pure-C CSV parser."
11
+ p.description = "Ruby CSV parser gem, written in pure C."
7
12
  p.url = "http://github.com/evan/ccsv/"
8
13
  p.docs_host = "evan.github.com/fauna/"
9
14
  end
15
+
16
+ require 'rake/testtask'
17
+ Rake::TestTask.new do |t|
18
+ t.pattern = "spec/*_spec.rb"
19
+ end
@@ -1,26 +1,23 @@
1
1
  # -*- encoding: utf-8 -*-
2
2
 
3
3
  Gem::Specification.new do |s|
4
- s.name = %q{ccsv}
5
- s.version = "0.1.2"
4
+ s.name = "ccsv"
5
+ s.version = "1.0.1"
6
6
 
7
7
  s.required_rubygems_version = Gem::Requirement.new(">= 1.2") if s.respond_to? :required_rubygems_version=
8
- s.authors = [%q{Evan Weaver}]
9
- s.cert_chain = [%q{/Users/eweaver/p/configuration/gem_certificates/evan_weaver-original-public_cert.pem}]
10
- s.date = %q{2012-02-24}
11
- s.description = %q{A pure-C CSV parser.}
12
- s.email = %q{}
13
- s.extensions = [%q{ext/extconf.rb}]
14
- s.extra_rdoc_files = [%q{CHANGELOG}, %q{LICENSE}, %q{README.rdoc}, %q{ext/ccsv.c}, %q{ext/ccsv.h}, %q{ext/extconf.rb}]
15
- s.files = [%q{CHANGELOG}, %q{LICENSE}, %q{Manifest}, %q{README.rdoc}, %q{Rakefile}, %q{ext/ccsv.c}, %q{ext/ccsv.h}, %q{ext/extconf.rb}, %q{test/data.csv}, %q{test/data_small.csv}, %q{test/unit/test_ccsv.rb}, %q{ccsv.gemspec}]
16
- s.homepage = %q{http://github.com/evan/ccsv/}
17
- s.rdoc_options = [%q{--line-numbers}, %q{--inline-source}, %q{--title}, %q{Ccsv}, %q{--main}, %q{README.rdoc}]
18
- s.require_paths = [%q{lib}, %q{ext}]
19
- s.rubyforge_project = %q{evan}
20
- s.rubygems_version = %q{1.8.6}
21
- s.signing_key = %q{/Users/eweaver/p/configuration/gem_certificates/evan_weaver-original-private_key.pem}
22
- s.summary = %q{A pure-C CSV parser.}
23
- s.test_files = [%q{test/unit/test_ccsv.rb}]
8
+ s.authors = ["Evan Weaver, Sergey Zhumatiy"]
9
+ s.date = "2014-01-10"
10
+ s.description = "Ruby CSV parser gem, written in pure C."
11
+ s.email = "serg@parallel.ru"
12
+ s.extensions = ["ext/extconf.rb"]
13
+ s.extra_rdoc_files = ["CHANGELOG", "LICENSE", "README.rdoc", "ext/ccsv.c", "ext/ccsv.h", "ext/extconf.rb"]
14
+ s.files = ["CHANGELOG", "Gemfile", "Gemfile.lock", "LICENSE", "Manifest", "README.rdoc", "Rakefile", "compile", "ext/ccsv.c", "ext/ccsv.h", "ext/extconf.rb", "spec/ccsv_spec.rb", "ccsv.gemspec"]
15
+ s.homepage = "http://github.com/evan/ccsv/"
16
+ s.rdoc_options = ["--line-numbers", "--title", "Ccsv", "--main", "README.rdoc"]
17
+ s.require_paths = ["lib", "ext"]
18
+ s.rubyforge_project = "evan"
19
+ s.rubygems_version = "1.8.23"
20
+ s.summary = "A pure-C CSV parser."
24
21
 
25
22
  if s.respond_to? :specification_version then
26
23
  s.specification_version = 3
data/compile ADDED
@@ -0,0 +1,7 @@
1
+ #!/bin/sh
2
+
3
+ bundle install
4
+ rake compile
5
+ rake build
6
+ rake gem
7
+ rake test
data/ext/ccsv.c CHANGED
@@ -1,32 +1,163 @@
1
+ #include <limits.h>
1
2
  #include "ccsv.h"
2
3
 
3
4
  static VALUE rb_cC;
4
5
 
5
- static VALUE foreach(VALUE self, VALUE filename) {
6
+ /* Ccsv.foreach(filename,delimiter,[index],[range,...]) do |line| ... */
6
7
 
7
- FILE *file = fopen(StringValueCStr(filename), "r");
8
- if (file == NULL)
9
- rb_raise(rb_eRuntimeError, "File not found");
8
+ struct pair_st {
9
+ long int low,high;
10
+ };
10
11
 
12
+ #define MAX_INTERVALS 1024
13
+
14
+ static VALUE foreach(int argc, VALUE* argv, VALUE self) {
15
+ char DELIM=DEF_DELIM;
11
16
  char *line = NULL;
12
17
  size_t len = 0;
13
- char *token;
14
- int idx;
18
+ char *token,*start,*nobackslash,*t2, *str;
19
+ int idx,count,pairs_count,searchfield,flag,i,array_length,range_i,len2;
20
+ long check;
21
+ FILE *file;
22
+ ID min_method, max_method;
23
+ VALUE min_val, max_val;
24
+ VALUE tmp_value, rest_args, filename;
25
+ ID array_length_method; /*----------------------------------------*/
26
+ struct pair_st pairs[MAX_INTERVALS];
15
27
 
16
28
  VALUE ary;
17
-
29
+
30
+ rb_scan_args(argc,argv,"1*", &filename, &rest_args);
31
+
32
+ /* if (argc == 0) { // there should only be 1 or 2 arguments
33
+ rb_raise(rb_eArgError, "wrong number of arguments");
34
+ }
35
+
36
+ file = fopen(StringValueCStr(argv[0]), "r");
37
+ if (file == NULL)
38
+ rb_raise(rb_eRuntimeError, "File not found");
39
+ */
40
+ file = fopen(StringValueCStr(filename), "r");
41
+ if(file==NULL){
42
+ rb_raise(rb_eRuntimeError, "File not found");
43
+ }
44
+
45
+ if (argc >1 ) { /* delimiter */
46
+ tmp_value=rb_ary_entry(rest_args,0);
47
+ str=StringValueCStr(tmp_value);
48
+ DELIM=str[0];
49
+ }
50
+
51
+ if (argc >2 ) { /* search index */
52
+ tmp_value=rb_ary_entry(rest_args,1);
53
+ searchfield=NUM2INT(tmp_value);
54
+ }
55
+ else{
56
+ searchfield=-1;
57
+ }
58
+
59
+ min_val=rb_funcall(rest_args,rb_intern("length"), 0);
60
+ array_length=NUM2INT(min_val);
61
+ /*rb_warn("Length=%d",array_length);*/
62
+
63
+ min_method = rb_intern("min");
64
+ max_method = rb_intern("max");
65
+ /*------------test_id = rb_intern("class");*/
66
+ range_i=0;
67
+ for(idx=2;idx<array_length;++idx){
68
+ min_val=rb_funcall(rb_ary_entry(rest_args,idx),rb_intern("length"), 0);
69
+ len2=NUM2INT(min_val);
70
+ for(i=0;i<len2;++i){
71
+ VALUE e=rb_ary_entry(rb_ary_entry(rest_args,idx),i);
72
+ if(range_i>MAX_INTERVALS)
73
+ rb_raise(rb_eRuntimeError, "Too much ranges passed");
74
+ if(TYPE(e) == T_NIL){
75
+ pairs[range_i].low=LONG_MIN;
76
+ pairs[range_i].high=LONG_MAX;
77
+
78
+ continue; /* just skip nil */
79
+ }
80
+ if (! (rb_respond_to(e, min_method) & rb_respond_to(e, max_method)))
81
+ rb_raise(rb_eRuntimeError, "Not range passed to Ccsv.foreach");
82
+
83
+ min_val=rb_funcall(e, min_method, 0);
84
+ max_val=rb_funcall(e, max_method, 0);
85
+ /* rb_warn("!\n");*/
86
+ pairs[range_i].low=NUM2LONG(min_val);
87
+ /*rb_warn("2\n");*/
88
+ pairs[range_i].high=NUM2LONG(max_val);
89
+ /*rb_warn("RANGE: %ld .. %ld (%d)\n",(long)pairs[range_i].low,(long)pairs[range_i].high,(int)(range_i));*/
90
+ range_i++;
91
+ }
92
+ }
93
+ pairs_count=range_i;
94
+
18
95
  while (getline(&line, &len, file) != -1) {
96
+ /* chomp! */
97
+ if(token=index(line,EOL)){
98
+ *token='\0';
99
+ }
100
+ /*rb_warning("4\n");*/
19
101
  ary = rb_ary_new();
20
- token = strtok(line, DELIMITERS);
102
+ start=line;
103
+ nobackslash=line;
104
+ while(token=index(nobackslash, DELIM)){
105
+ /* rb_warning("5\n");*/
106
+ count=0;
107
+ t2=token-1;
108
+ while((t2>=line) && (*t2=='\\'))
109
+ {++count;--t2;}
110
+ if(count%2 ==1){ /* backslashed! skip */
111
+ nobackslash=token;
112
+ continue;
113
+ }
114
+ break;
115
+ }
21
116
  idx = 0;
117
+ flag=1;
22
118
 
23
119
  while (token != NULL) {
24
- rb_ary_store(ary, idx, rb_str_new(token, strlen(token)));
25
- idx ++;
26
- token = strtok(NULL, DELIMITERS);
120
+ *token='\0';
121
+ if(searchfield==idx){
122
+ flag=0;
123
+ /* do check! */
124
+ sscanf(start,"%ld",&check);
125
+ for(i=0;i<pairs_count;++i){
126
+ /*rb_warn("check %ld: [%ld .. %ld]",check,pairs[i].low,pairs[i].high);*/
127
+ if(pairs[i].low<check && pairs[i].high>check){
128
+ /*rb_warn("check passed");*/
129
+ flag=1; /* yahooo! */
130
+ break;
131
+ }
132
+ }
133
+ }
134
+
135
+ /* not in ranges! */
136
+ if(flag==0)
137
+ break;
138
+
139
+ rb_ary_store(ary, idx, rb_str_new(start, token-start));
140
+ idx++;
141
+ nobackslash=start=token+1;
142
+ while(token=index(nobackslash, DELIM)){
143
+ count=0;
144
+ t2=token-1;
145
+ while((t2>=line) && (*t2=='\\'))
146
+ {++count;--t2;}
147
+ if(count%2 ==1){ /* backslashed! skip */
148
+ nobackslash=token;
149
+ continue;
150
+ }
151
+ break;
152
+ }
27
153
  }
154
+ if(flag==0)
155
+ continue;
28
156
 
157
+ /* last item */
158
+ rb_ary_store(ary, idx, rb_str_new(start, strlen(start)));
29
159
  /* OBJ_FREEZE(ary); */
160
+
30
161
  rb_yield(ary);
31
162
  /* FL_UNSET((ary), FL_FREEZE); */
32
163
 
@@ -45,5 +176,7 @@ void
45
176
  Init_ccsv()
46
177
  {
47
178
  rb_cC = rb_define_class("Ccsv", rb_cObject);
48
- rb_define_singleton_method(rb_cC, "foreach", foreach, 1);
179
+ rb_define_singleton_method(rb_cC, "foreach", foreach, -1);
180
+ rb_define_const(rb_cC, "MAX", LONG2NUM(LONG_MAX));
181
+ rb_define_const(rb_cC, "MIN", LONG2NUM(LONG_MIN));
49
182
  }
data/ext/ccsv.h CHANGED
@@ -1,4 +1,7 @@
1
1
  #include "ruby.h"
2
2
 
3
- #define DELIMITERS ",\n"
3
+ /*#define DELIMITERS ",\n"*/
4
+
5
+ #define EOL '\n'
6
+ #define DEF_DELIM ','
4
7
  #define ARY_DEFAULT_SIZE 16
@@ -0,0 +1,134 @@
1
+ require 'bundler'
2
+ Bundler.require(:default, :test)
3
+
4
+ gem 'minitest'
5
+ #require 'minitest/benchmark'
6
+ require 'minitest/autorun'
7
+ #require 'minitest/spec'
8
+ require 'ccsv'
9
+ #require 'csv'
10
+
11
+ TEST_CSV="/tmp/test.csv"
12
+
13
+ module CSVScan
14
+ def self.foreach(file, &block)
15
+ open(file) do |f|
16
+ scan(f, &block)
17
+ end
18
+ end
19
+ end
20
+
21
+ def create_csv(delimiter=',')
22
+ open(TEST_CSV,"w") do |f|
23
+ 1.upto(100000) do |n|
24
+ f.puts [n,2*n,3+n].join(delimiter)
25
+ end
26
+ end
27
+ end
28
+
29
+
30
+ describe Ccsv do
31
+ before do
32
+ @csv=[]
33
+ end
34
+
35
+ it 'reads csv with default delimiter' do
36
+ create_csv
37
+ Ccsv.foreach(TEST_CSV) do |v|
38
+ @csv << v
39
+ end
40
+ @csv[15000].must_equal(['15001','30002','15004'])
41
+ @csv.size.must_equal(100000)
42
+ end
43
+
44
+ it 'reads csv with tab delimiter' do
45
+ create_csv("\t")
46
+ Ccsv.foreach(TEST_CSV,"\t") do |v|
47
+ @csv << v
48
+ end
49
+ @csv[15000].must_equal(['15001','30002','15004'])
50
+ end
51
+
52
+ it 'reads csv with comma delimiter' do
53
+ create_csv(',')
54
+ Ccsv.foreach(TEST_CSV,',') do |v|
55
+ @csv << v
56
+ end
57
+ @csv[15000].must_equal(['15001','30002','15004'])
58
+ end
59
+
60
+ it 'raises error' do
61
+ proc {
62
+ Ccsv.foreach('/non-existent-file') do |x| end
63
+ }.must_raise(RuntimeError)
64
+ end
65
+
66
+ # bench_performance_linear 'just read', 0.9 do |n|
67
+ # create_csv(',',n)
68
+ # Ccsv.foreach(TEST_CSV)
69
+ # end
70
+ end
71
+
72
+ #describe 'my benchmarks' do
73
+ #end
74
+
75
+ __END__
76
+ require 'test/unit'
77
+ require 'ccsv'
78
+ require 'benchmark'
79
+
80
+ # Yeah, I know.
81
+ begin
82
+ require 'csv'
83
+ require 'rubygems'
84
+ require 'lightcsv'
85
+ require 'csvscan'
86
+
87
+ module CSVScan
88
+ def self.foreach(file, &block)
89
+ open(file) do |f|
90
+ scan(f, &block)
91
+ end
92
+ end
93
+ end
94
+
95
+ rescue LoadError
96
+ end
97
+
98
+ class TestCcsv < Test::Unit::TestCase
99
+
100
+ def setup
101
+ @dir = "#{File.dirname(__FILE__)}/../"
102
+ end
103
+
104
+ def test_should_raise
105
+ assert_raises(RuntimeError) do
106
+ Ccsv.foreach('fdssfd') do
107
+ end
108
+ end
109
+ end
110
+
111
+ def test_accuracy
112
+ ccsv = []
113
+ file = @dir + "data_small.csv"
114
+ Ccsv.foreach(file) do |values|
115
+ ccsv << values.dup
116
+ end
117
+ csv = []
118
+ CSV.foreach(file) do |values|
119
+ csv << values
120
+ end
121
+ assert_equal csv, ccsv
122
+ end
123
+
124
+ def test_speed
125
+ Benchmark.bm(5) do |x|
126
+ [Ccsv, CSV].each do |klass| # CSVScan, LightCsv,
127
+ x.report(klass.name) do
128
+ klass.foreach(@dir + "data.csv") do |values| end
129
+ end
130
+ end
131
+ end
132
+ end
133
+
134
+ end