ccsv 0.1.2 → 1.0.1
Sign up to get free protection for your applications and to get access to all the features.
Potentially problematic release.
This version of ccsv might be problematic. Click here for more details.
- data/CHANGELOG +4 -0
- data/Gemfile +7 -0
- data/Gemfile.lock +23 -0
- data/Manifest +4 -3
- data/README.rdoc +32 -5
- data/Rakefile +11 -1
- data/ccsv.gemspec +15 -18
- data/compile +7 -0
- data/ext/ccsv.c +145 -12
- data/ext/ccsv.h +4 -1
- data/spec/ccsv_spec.rb +134 -0
- metadata +12 -54
- data.tar.gz.sig +0 -1
- data/test/data.csv +0 -1000000
- data/test/data_small.csv +0 -1000
- data/test/unit/test_ccsv.rb +0 -60
- metadata.gz.sig +0 -0
data/CHANGELOG
CHANGED
data/Gemfile
ADDED
data/Gemfile.lock
ADDED
@@ -0,0 +1,23 @@
|
|
1
|
+
GEM
|
2
|
+
specs:
|
3
|
+
allison (2.0.3)
|
4
|
+
echoe (4.6.5)
|
5
|
+
allison (>= 2.0.3)
|
6
|
+
rake (>= 0.9.2)
|
7
|
+
rdoc (>= 2.5.11)
|
8
|
+
rubyforge (>= 2.0.4)
|
9
|
+
json (1.8.1)
|
10
|
+
json_pure (1.8.1)
|
11
|
+
minitest (5.2.1)
|
12
|
+
rake (10.1.0)
|
13
|
+
rdoc (4.0.1)
|
14
|
+
json (~> 1.4)
|
15
|
+
rubyforge (2.0.4)
|
16
|
+
json_pure (>= 1.1.7)
|
17
|
+
|
18
|
+
PLATFORMS
|
19
|
+
ruby
|
20
|
+
|
21
|
+
DEPENDENCIES
|
22
|
+
echoe
|
23
|
+
minitest
|
data/Manifest
CHANGED
data/README.rdoc
CHANGED
@@ -3,13 +3,40 @@ Ccsv
|
|
3
3
|
|
4
4
|
A pure-C CSV parser.
|
5
5
|
|
6
|
-
|
6
|
+
== Installation
|
7
7
|
|
8
|
-
|
8
|
+
gem install ccsv
|
9
|
+
|
10
|
+
== Usage
|
11
|
+
|
12
|
+
require 'rubygems'
|
13
|
+
require 'ccsv'
|
14
|
+
|
15
|
+
Ccsv.foreach("data.csv") do |line|
|
16
|
+
# Do something with the line array
|
17
|
+
end
|
18
|
+
|
19
|
+
# print all logins
|
20
|
+
Ccsv.foreach("/etc/passwd",":") do |line|
|
21
|
+
puts line[0]
|
22
|
+
end
|
9
23
|
|
10
|
-
|
24
|
+
== Advanced usage
|
11
25
|
|
12
|
-
|
26
|
+
Get users from passwd file, with UIDs between 1000 and 1010 and between 2000 and 2010.
|
27
|
+
Works with numbers only!
|
28
|
+
Third argument is column index, used for filtering, then one or more intervals.
|
29
|
+
|
30
|
+
Ccsv.foreach("/etc/passwd",":",2,[1000..1010],[2000..2010]) do |line|
|
31
|
+
puts line[0]
|
32
|
+
end
|
33
|
+
|
34
|
+
== Contacts
|
35
|
+
|
36
|
+
Forks, pull-requests and other contacts via guthub: http://github.com/evan/ccsv/
|
37
|
+
|
38
|
+
== License
|
13
39
|
|
14
|
-
|
40
|
+
Copyright 2012-2013 Sergey Zhumatiy
|
15
41
|
|
42
|
+
Copyright 2007-2012 Cloudburst, LLC. Licensed under the AFL 3. See the included LICENSE file.
|
data/Rakefile
CHANGED
@@ -1,9 +1,19 @@
|
|
1
|
+
require 'bundler'
|
2
|
+
Bundler.require(:default, :development)
|
3
|
+
|
1
4
|
require 'echoe'
|
2
5
|
|
3
6
|
Echoe.new("ccsv") do |p|
|
4
|
-
p.author = "Evan Weaver"
|
7
|
+
p.author = ["Evan Weaver","Sergey Zhumatiy"]
|
5
8
|
p.project = "evan"
|
9
|
+
p.email = "serg@parallel.ru"
|
6
10
|
p.summary = "A pure-C CSV parser."
|
11
|
+
p.description = "Ruby CSV parser gem, written in pure C."
|
7
12
|
p.url = "http://github.com/evan/ccsv/"
|
8
13
|
p.docs_host = "evan.github.com/fauna/"
|
9
14
|
end
|
15
|
+
|
16
|
+
require 'rake/testtask'
|
17
|
+
Rake::TestTask.new do |t|
|
18
|
+
t.pattern = "spec/*_spec.rb"
|
19
|
+
end
|
data/ccsv.gemspec
CHANGED
@@ -1,26 +1,23 @@
|
|
1
1
|
# -*- encoding: utf-8 -*-
|
2
2
|
|
3
3
|
Gem::Specification.new do |s|
|
4
|
-
s.name =
|
5
|
-
s.version = "0.1
|
4
|
+
s.name = "ccsv"
|
5
|
+
s.version = "1.0.1"
|
6
6
|
|
7
7
|
s.required_rubygems_version = Gem::Requirement.new(">= 1.2") if s.respond_to? :required_rubygems_version=
|
8
|
-
s.authors = [
|
9
|
-
s.
|
10
|
-
s.
|
11
|
-
s.
|
12
|
-
s.
|
13
|
-
s.
|
14
|
-
s.
|
15
|
-
s.
|
16
|
-
s.
|
17
|
-
s.
|
18
|
-
s.
|
19
|
-
s.
|
20
|
-
s.
|
21
|
-
s.signing_key = %q{/Users/eweaver/p/configuration/gem_certificates/evan_weaver-original-private_key.pem}
|
22
|
-
s.summary = %q{A pure-C CSV parser.}
|
23
|
-
s.test_files = [%q{test/unit/test_ccsv.rb}]
|
8
|
+
s.authors = ["Evan Weaver, Sergey Zhumatiy"]
|
9
|
+
s.date = "2014-01-10"
|
10
|
+
s.description = "Ruby CSV parser gem, written in pure C."
|
11
|
+
s.email = "serg@parallel.ru"
|
12
|
+
s.extensions = ["ext/extconf.rb"]
|
13
|
+
s.extra_rdoc_files = ["CHANGELOG", "LICENSE", "README.rdoc", "ext/ccsv.c", "ext/ccsv.h", "ext/extconf.rb"]
|
14
|
+
s.files = ["CHANGELOG", "Gemfile", "Gemfile.lock", "LICENSE", "Manifest", "README.rdoc", "Rakefile", "compile", "ext/ccsv.c", "ext/ccsv.h", "ext/extconf.rb", "spec/ccsv_spec.rb", "ccsv.gemspec"]
|
15
|
+
s.homepage = "http://github.com/evan/ccsv/"
|
16
|
+
s.rdoc_options = ["--line-numbers", "--title", "Ccsv", "--main", "README.rdoc"]
|
17
|
+
s.require_paths = ["lib", "ext"]
|
18
|
+
s.rubyforge_project = "evan"
|
19
|
+
s.rubygems_version = "1.8.23"
|
20
|
+
s.summary = "A pure-C CSV parser."
|
24
21
|
|
25
22
|
if s.respond_to? :specification_version then
|
26
23
|
s.specification_version = 3
|
data/compile
ADDED
data/ext/ccsv.c
CHANGED
@@ -1,32 +1,163 @@
|
|
1
|
+
#include <limits.h>
|
1
2
|
#include "ccsv.h"
|
2
3
|
|
3
4
|
static VALUE rb_cC;
|
4
5
|
|
5
|
-
|
6
|
+
/* Ccsv.foreach(filename,delimiter,[index],[range,...]) do |line| ... */
|
6
7
|
|
7
|
-
|
8
|
-
|
9
|
-
|
8
|
+
struct pair_st {
|
9
|
+
long int low,high;
|
10
|
+
};
|
10
11
|
|
12
|
+
#define MAX_INTERVALS 1024
|
13
|
+
|
14
|
+
static VALUE foreach(int argc, VALUE* argv, VALUE self) {
|
15
|
+
char DELIM=DEF_DELIM;
|
11
16
|
char *line = NULL;
|
12
17
|
size_t len = 0;
|
13
|
-
char *token;
|
14
|
-
int idx;
|
18
|
+
char *token,*start,*nobackslash,*t2, *str;
|
19
|
+
int idx,count,pairs_count,searchfield,flag,i,array_length,range_i,len2;
|
20
|
+
long check;
|
21
|
+
FILE *file;
|
22
|
+
ID min_method, max_method;
|
23
|
+
VALUE min_val, max_val;
|
24
|
+
VALUE tmp_value, rest_args, filename;
|
25
|
+
ID array_length_method; /*----------------------------------------*/
|
26
|
+
struct pair_st pairs[MAX_INTERVALS];
|
15
27
|
|
16
28
|
VALUE ary;
|
17
|
-
|
29
|
+
|
30
|
+
rb_scan_args(argc,argv,"1*", &filename, &rest_args);
|
31
|
+
|
32
|
+
/* if (argc == 0) { // there should only be 1 or 2 arguments
|
33
|
+
rb_raise(rb_eArgError, "wrong number of arguments");
|
34
|
+
}
|
35
|
+
|
36
|
+
file = fopen(StringValueCStr(argv[0]), "r");
|
37
|
+
if (file == NULL)
|
38
|
+
rb_raise(rb_eRuntimeError, "File not found");
|
39
|
+
*/
|
40
|
+
file = fopen(StringValueCStr(filename), "r");
|
41
|
+
if(file==NULL){
|
42
|
+
rb_raise(rb_eRuntimeError, "File not found");
|
43
|
+
}
|
44
|
+
|
45
|
+
if (argc >1 ) { /* delimiter */
|
46
|
+
tmp_value=rb_ary_entry(rest_args,0);
|
47
|
+
str=StringValueCStr(tmp_value);
|
48
|
+
DELIM=str[0];
|
49
|
+
}
|
50
|
+
|
51
|
+
if (argc >2 ) { /* search index */
|
52
|
+
tmp_value=rb_ary_entry(rest_args,1);
|
53
|
+
searchfield=NUM2INT(tmp_value);
|
54
|
+
}
|
55
|
+
else{
|
56
|
+
searchfield=-1;
|
57
|
+
}
|
58
|
+
|
59
|
+
min_val=rb_funcall(rest_args,rb_intern("length"), 0);
|
60
|
+
array_length=NUM2INT(min_val);
|
61
|
+
/*rb_warn("Length=%d",array_length);*/
|
62
|
+
|
63
|
+
min_method = rb_intern("min");
|
64
|
+
max_method = rb_intern("max");
|
65
|
+
/*------------test_id = rb_intern("class");*/
|
66
|
+
range_i=0;
|
67
|
+
for(idx=2;idx<array_length;++idx){
|
68
|
+
min_val=rb_funcall(rb_ary_entry(rest_args,idx),rb_intern("length"), 0);
|
69
|
+
len2=NUM2INT(min_val);
|
70
|
+
for(i=0;i<len2;++i){
|
71
|
+
VALUE e=rb_ary_entry(rb_ary_entry(rest_args,idx),i);
|
72
|
+
if(range_i>MAX_INTERVALS)
|
73
|
+
rb_raise(rb_eRuntimeError, "Too much ranges passed");
|
74
|
+
if(TYPE(e) == T_NIL){
|
75
|
+
pairs[range_i].low=LONG_MIN;
|
76
|
+
pairs[range_i].high=LONG_MAX;
|
77
|
+
|
78
|
+
continue; /* just skip nil */
|
79
|
+
}
|
80
|
+
if (! (rb_respond_to(e, min_method) & rb_respond_to(e, max_method)))
|
81
|
+
rb_raise(rb_eRuntimeError, "Not range passed to Ccsv.foreach");
|
82
|
+
|
83
|
+
min_val=rb_funcall(e, min_method, 0);
|
84
|
+
max_val=rb_funcall(e, max_method, 0);
|
85
|
+
/* rb_warn("!\n");*/
|
86
|
+
pairs[range_i].low=NUM2LONG(min_val);
|
87
|
+
/*rb_warn("2\n");*/
|
88
|
+
pairs[range_i].high=NUM2LONG(max_val);
|
89
|
+
/*rb_warn("RANGE: %ld .. %ld (%d)\n",(long)pairs[range_i].low,(long)pairs[range_i].high,(int)(range_i));*/
|
90
|
+
range_i++;
|
91
|
+
}
|
92
|
+
}
|
93
|
+
pairs_count=range_i;
|
94
|
+
|
18
95
|
while (getline(&line, &len, file) != -1) {
|
96
|
+
/* chomp! */
|
97
|
+
if(token=index(line,EOL)){
|
98
|
+
*token='\0';
|
99
|
+
}
|
100
|
+
/*rb_warning("4\n");*/
|
19
101
|
ary = rb_ary_new();
|
20
|
-
|
102
|
+
start=line;
|
103
|
+
nobackslash=line;
|
104
|
+
while(token=index(nobackslash, DELIM)){
|
105
|
+
/* rb_warning("5\n");*/
|
106
|
+
count=0;
|
107
|
+
t2=token-1;
|
108
|
+
while((t2>=line) && (*t2=='\\'))
|
109
|
+
{++count;--t2;}
|
110
|
+
if(count%2 ==1){ /* backslashed! skip */
|
111
|
+
nobackslash=token;
|
112
|
+
continue;
|
113
|
+
}
|
114
|
+
break;
|
115
|
+
}
|
21
116
|
idx = 0;
|
117
|
+
flag=1;
|
22
118
|
|
23
119
|
while (token != NULL) {
|
24
|
-
|
25
|
-
idx
|
26
|
-
|
120
|
+
*token='\0';
|
121
|
+
if(searchfield==idx){
|
122
|
+
flag=0;
|
123
|
+
/* do check! */
|
124
|
+
sscanf(start,"%ld",&check);
|
125
|
+
for(i=0;i<pairs_count;++i){
|
126
|
+
/*rb_warn("check %ld: [%ld .. %ld]",check,pairs[i].low,pairs[i].high);*/
|
127
|
+
if(pairs[i].low<check && pairs[i].high>check){
|
128
|
+
/*rb_warn("check passed");*/
|
129
|
+
flag=1; /* yahooo! */
|
130
|
+
break;
|
131
|
+
}
|
132
|
+
}
|
133
|
+
}
|
134
|
+
|
135
|
+
/* not in ranges! */
|
136
|
+
if(flag==0)
|
137
|
+
break;
|
138
|
+
|
139
|
+
rb_ary_store(ary, idx, rb_str_new(start, token-start));
|
140
|
+
idx++;
|
141
|
+
nobackslash=start=token+1;
|
142
|
+
while(token=index(nobackslash, DELIM)){
|
143
|
+
count=0;
|
144
|
+
t2=token-1;
|
145
|
+
while((t2>=line) && (*t2=='\\'))
|
146
|
+
{++count;--t2;}
|
147
|
+
if(count%2 ==1){ /* backslashed! skip */
|
148
|
+
nobackslash=token;
|
149
|
+
continue;
|
150
|
+
}
|
151
|
+
break;
|
152
|
+
}
|
27
153
|
}
|
154
|
+
if(flag==0)
|
155
|
+
continue;
|
28
156
|
|
157
|
+
/* last item */
|
158
|
+
rb_ary_store(ary, idx, rb_str_new(start, strlen(start)));
|
29
159
|
/* OBJ_FREEZE(ary); */
|
160
|
+
|
30
161
|
rb_yield(ary);
|
31
162
|
/* FL_UNSET((ary), FL_FREEZE); */
|
32
163
|
|
@@ -45,5 +176,7 @@ void
|
|
45
176
|
Init_ccsv()
|
46
177
|
{
|
47
178
|
rb_cC = rb_define_class("Ccsv", rb_cObject);
|
48
|
-
rb_define_singleton_method(rb_cC, "foreach", foreach, 1);
|
179
|
+
rb_define_singleton_method(rb_cC, "foreach", foreach, -1);
|
180
|
+
rb_define_const(rb_cC, "MAX", LONG2NUM(LONG_MAX));
|
181
|
+
rb_define_const(rb_cC, "MIN", LONG2NUM(LONG_MIN));
|
49
182
|
}
|
data/ext/ccsv.h
CHANGED
data/spec/ccsv_spec.rb
ADDED
@@ -0,0 +1,134 @@
|
|
1
|
+
require 'bundler'
|
2
|
+
Bundler.require(:default, :test)
|
3
|
+
|
4
|
+
gem 'minitest'
|
5
|
+
#require 'minitest/benchmark'
|
6
|
+
require 'minitest/autorun'
|
7
|
+
#require 'minitest/spec'
|
8
|
+
require 'ccsv'
|
9
|
+
#require 'csv'
|
10
|
+
|
11
|
+
TEST_CSV="/tmp/test.csv"
|
12
|
+
|
13
|
+
module CSVScan
|
14
|
+
def self.foreach(file, &block)
|
15
|
+
open(file) do |f|
|
16
|
+
scan(f, &block)
|
17
|
+
end
|
18
|
+
end
|
19
|
+
end
|
20
|
+
|
21
|
+
def create_csv(delimiter=',')
|
22
|
+
open(TEST_CSV,"w") do |f|
|
23
|
+
1.upto(100000) do |n|
|
24
|
+
f.puts [n,2*n,3+n].join(delimiter)
|
25
|
+
end
|
26
|
+
end
|
27
|
+
end
|
28
|
+
|
29
|
+
|
30
|
+
describe Ccsv do
|
31
|
+
before do
|
32
|
+
@csv=[]
|
33
|
+
end
|
34
|
+
|
35
|
+
it 'reads csv with default delimiter' do
|
36
|
+
create_csv
|
37
|
+
Ccsv.foreach(TEST_CSV) do |v|
|
38
|
+
@csv << v
|
39
|
+
end
|
40
|
+
@csv[15000].must_equal(['15001','30002','15004'])
|
41
|
+
@csv.size.must_equal(100000)
|
42
|
+
end
|
43
|
+
|
44
|
+
it 'reads csv with tab delimiter' do
|
45
|
+
create_csv("\t")
|
46
|
+
Ccsv.foreach(TEST_CSV,"\t") do |v|
|
47
|
+
@csv << v
|
48
|
+
end
|
49
|
+
@csv[15000].must_equal(['15001','30002','15004'])
|
50
|
+
end
|
51
|
+
|
52
|
+
it 'reads csv with comma delimiter' do
|
53
|
+
create_csv(',')
|
54
|
+
Ccsv.foreach(TEST_CSV,',') do |v|
|
55
|
+
@csv << v
|
56
|
+
end
|
57
|
+
@csv[15000].must_equal(['15001','30002','15004'])
|
58
|
+
end
|
59
|
+
|
60
|
+
it 'raises error' do
|
61
|
+
proc {
|
62
|
+
Ccsv.foreach('/non-existent-file') do |x| end
|
63
|
+
}.must_raise(RuntimeError)
|
64
|
+
end
|
65
|
+
|
66
|
+
# bench_performance_linear 'just read', 0.9 do |n|
|
67
|
+
# create_csv(',',n)
|
68
|
+
# Ccsv.foreach(TEST_CSV)
|
69
|
+
# end
|
70
|
+
end
|
71
|
+
|
72
|
+
#describe 'my benchmarks' do
|
73
|
+
#end
|
74
|
+
|
75
|
+
__END__
|
76
|
+
require 'test/unit'
|
77
|
+
require 'ccsv'
|
78
|
+
require 'benchmark'
|
79
|
+
|
80
|
+
# Yeah, I know.
|
81
|
+
begin
|
82
|
+
require 'csv'
|
83
|
+
require 'rubygems'
|
84
|
+
require 'lightcsv'
|
85
|
+
require 'csvscan'
|
86
|
+
|
87
|
+
module CSVScan
|
88
|
+
def self.foreach(file, &block)
|
89
|
+
open(file) do |f|
|
90
|
+
scan(f, &block)
|
91
|
+
end
|
92
|
+
end
|
93
|
+
end
|
94
|
+
|
95
|
+
rescue LoadError
|
96
|
+
end
|
97
|
+
|
98
|
+
class TestCcsv < Test::Unit::TestCase
|
99
|
+
|
100
|
+
def setup
|
101
|
+
@dir = "#{File.dirname(__FILE__)}/../"
|
102
|
+
end
|
103
|
+
|
104
|
+
def test_should_raise
|
105
|
+
assert_raises(RuntimeError) do
|
106
|
+
Ccsv.foreach('fdssfd') do
|
107
|
+
end
|
108
|
+
end
|
109
|
+
end
|
110
|
+
|
111
|
+
def test_accuracy
|
112
|
+
ccsv = []
|
113
|
+
file = @dir + "data_small.csv"
|
114
|
+
Ccsv.foreach(file) do |values|
|
115
|
+
ccsv << values.dup
|
116
|
+
end
|
117
|
+
csv = []
|
118
|
+
CSV.foreach(file) do |values|
|
119
|
+
csv << values
|
120
|
+
end
|
121
|
+
assert_equal csv, ccsv
|
122
|
+
end
|
123
|
+
|
124
|
+
def test_speed
|
125
|
+
Benchmark.bm(5) do |x|
|
126
|
+
[Ccsv, CSV].each do |klass| # CSVScan, LightCsv,
|
127
|
+
x.report(klass.name) do
|
128
|
+
klass.foreach(@dir + "data.csv") do |values| end
|
129
|
+
end
|
130
|
+
end
|
131
|
+
end
|
132
|
+
end
|
133
|
+
|
134
|
+
end
|