fastest-csv 0.0.2-java

Sign up to get free protection for your applications and to get access to all the features.
@@ -0,0 +1,20 @@
1
+ *.gem
2
+ *.rbc
3
+ .bundle
4
+ .config
5
+ .yardoc
6
+ .DS_Store
7
+ Gemfile.lock
8
+ InstalledFiles
9
+ _yardoc
10
+ coverage
11
+ doc/
12
+ lib/bundler/man
13
+ pkg
14
+ rdoc
15
+ spec/reports
16
+ test/tmp
17
+ test/version_tmp
18
+ tmp
19
+ lib/*.bundle
20
+ lib/*.jar
data/Gemfile ADDED
@@ -0,0 +1,4 @@
1
+ source 'https://rubygems.org'
2
+
3
+ # Specify your gem's dependencies in fastest-csv.gemspec
4
+ gemspec
data/LICENSE ADDED
@@ -0,0 +1,22 @@
1
+ Copyright (c) 2012 Maarten Oelering
2
+
3
+ MIT License
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining
6
+ a copy of this software and associated documentation files (the
7
+ "Software"), to deal in the Software without restriction, including
8
+ without limitation the rights to use, copy, modify, merge, publish,
9
+ distribute, sublicense, and/or sell copies of the Software, and to
10
+ permit persons to whom the Software is furnished to do so, subject to
11
+ the following conditions:
12
+
13
+ The above copyright notice and this permission notice shall be
14
+ included in all copies or substantial portions of the Software.
15
+
16
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
17
+ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
18
+ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
19
+ NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
20
+ LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
21
+ OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
22
+ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
@@ -0,0 +1,68 @@
1
+ # FastestCSV
2
+
3
+ Fastest CSV class for MRI Ruby and JRuby. Faster than faster_csv and fasterer-csv.
4
+
5
+ Uses native C code to parse CSV lines in MRI Ruby and Java in JRuby.
6
+
7
+ Supports standard CSV according to RFC4180. Not the so-called "csv" from Excel.
8
+
9
+ The interface is a subset of the CSV interface in Ruby 1.9.3. The options parameter is not supported.
10
+
11
+ Originally developed to parse large CSV log files from PowerMTA.
12
+
13
+ ## Installation
14
+
15
+ Add this line to your application's Gemfile:
16
+
17
+ gem 'fastest-csv'
18
+
19
+ And then execute:
20
+
21
+ $ bundle
22
+
23
+ Or install it yourself as:
24
+
25
+ $ gem install fastest-csv
26
+
27
+ ## Usage
28
+
29
+ Parse single line
30
+
31
+ FastestCSV.parse_line("one,two,three")
32
+ => ["one", "two", "three"]
33
+
34
+ "one,two,three".parse_csv
35
+ => ["one", "two", "three"]
36
+
37
+ Parse file without header
38
+
39
+ FastestCSV.foreach("path/to/file.csv") do |row|
40
+ while row = csv.shift
41
+ #
42
+ end
43
+ end
44
+
45
+ Parse file with header
46
+
47
+ FastestCSV.open("path/to/file.csv") do |csv|
48
+ fields = csv.shift
49
+ while values = csv.shift
50
+ #
51
+ end
52
+ end
53
+
54
+ Parse file in array of arrays
55
+
56
+ rows = FastestCSV.read("path/to/file.csv")
57
+
58
+ Parse string in array of arrays
59
+
60
+ rows = FastestCSV.parse(csv_data)
61
+
62
+ ## Contributing
63
+
64
+ 1. Fork it
65
+ 2. Create your feature branch (`git checkout -b my-new-feature`)
66
+ 3. Commit your changes (`git commit -am 'Added some feature'`)
67
+ 4. Push to the branch (`git push origin my-new-feature`)
68
+ 5. Create new Pull Request
@@ -0,0 +1,22 @@
1
+ #!/usr/bin/env rake
2
+ require "bundler/gem_tasks"
3
+
4
+ spec = Gem::Specification.load('fastest-csv.gemspec')
5
+
6
+ if RUBY_PLATFORM =~ /java/
7
+ require 'rake/javaextensiontask'
8
+ Rake::JavaExtensionTask.new('csv_parser', spec)
9
+ else
10
+ require 'rake/extensiontask'
11
+ Rake::ExtensionTask.new('csv_parser', spec)
12
+ end
13
+
14
+ require 'rake/testtask'
15
+ Rake::TestTask.new do |t|
16
+ t.libs << "test"
17
+ t.test_files = FileList['test/tc_*.rb']
18
+ #test.libs << 'lib' << 'test'
19
+ #test.pattern = 'test/**/test_*.rb'
20
+ #test.verbose = true
21
+ end
22
+
@@ -0,0 +1,112 @@
1
+ //
2
+ // Copyright (c) Maarten Oelering, BrightCode BV
3
+ //
4
+
5
+ package org.brightcode;
6
+
7
+ import java.io.IOException;
8
+
9
+ import org.jruby.Ruby;
10
+ import org.jruby.RubyArray;
11
+ import org.jruby.RubyModule;
12
+ import org.jruby.RubyString;
13
+ import org.jruby.runtime.Block;
14
+ import org.jruby.runtime.CallbackFactory;
15
+ import org.jruby.runtime.builtin.IRubyObject;
16
+ import org.jruby.runtime.load.BasicLibraryService;
17
+
18
+ public class CsvParserService implements BasicLibraryService {
19
+
20
+ private Ruby runtime;
21
+
22
+ private static int UNQUOTED = 0;
23
+ private static int IN_QUOTED = 1;
24
+ private static int QUOTE_IN_QUOTED = 2;
25
+
26
+ // Initial setup function. Takes a reference to the current JRuby runtime and
27
+ // sets up our modules.
28
+ public boolean basicLoad(Ruby runtime) throws IOException {
29
+ this.runtime = runtime;
30
+
31
+ RubyModule mCsvParser = runtime.defineModule("CsvParser");
32
+ CallbackFactory callbackFactory = runtime.callbackFactory(CsvParserService.class);
33
+ mCsvParser.defineModuleFunction("parse_line",
34
+ callbackFactory.getSingletonMethod("parseLine", RubyString.class));
35
+ return true;
36
+ }
37
+
38
+ public static IRubyObject parseLine(IRubyObject recv, RubyString line, Block unusedBlock) {
39
+ Ruby runtime = recv.getRuntime();
40
+
41
+ CharSequence seq = line.getValue();
42
+ int length = seq.length();
43
+ if (length == 0)
44
+ return runtime.getNil();
45
+
46
+ int state = UNQUOTED;
47
+ StringBuilder value = new StringBuilder(length); // field value, no longer than line
48
+ RubyArray array = RubyArray.newArray(runtime, 36);
49
+
50
+ for (int i = 0; i < length; i++) {
51
+ char c = seq.charAt(i);
52
+ switch (c) {
53
+ case ',':
54
+ if (state == UNQUOTED) {
55
+ if (value.length() == 0) {
56
+ array.append(runtime.getNil());
57
+ }
58
+ else {
59
+ array.append(RubyString.newString(runtime, value));
60
+ value.setLength(0);
61
+ }
62
+ }
63
+ else if (state == IN_QUOTED) {
64
+ value.append(c);
65
+ }
66
+ else if (state == 2) {
67
+ array.append(RubyString.newString(runtime, value));
68
+ value.setLength(0);
69
+ state = UNQUOTED;
70
+ }
71
+ break;
72
+ case '"':
73
+ if (state == UNQUOTED) {
74
+ state = IN_QUOTED;
75
+ }
76
+ else if (state == IN_QUOTED) {
77
+ state = QUOTE_IN_QUOTED;
78
+ }
79
+ else if (state == QUOTE_IN_QUOTED) {
80
+ value.append(c); // escaped quote
81
+ state = IN_QUOTED;
82
+ }
83
+ break;
84
+ case '\r':
85
+ case '\n':
86
+ if (state == IN_QUOTED) {
87
+ value.append(c);
88
+ }
89
+ else {
90
+ i = length; // only parse first line if multiline
91
+ }
92
+ break;
93
+ default:
94
+ value.append(c);
95
+ break;
96
+ }
97
+ }
98
+ if (state == UNQUOTED) {
99
+ if (value.length() == 0) {
100
+ array.append(runtime.getNil());
101
+ }
102
+ else {
103
+ array.append(RubyString.newString(runtime, value));
104
+ value.setLength(0);
105
+ }
106
+ }
107
+ else if (state == QUOTE_IN_QUOTED) {
108
+ array.append(RubyString.newString(runtime, value));
109
+ }
110
+ return array;
111
+ }
112
+ }
@@ -0,0 +1,15 @@
1
+ #!/usr/bin/ruby -w
2
+
3
+ require 'mkmf'
4
+ extension_name = 'csv_parser'
5
+ #dir_config(extension_name)
6
+
7
+ if RUBY_VERSION =~ /1.8/ then
8
+ $CPPFLAGS += " -DRUBY_18"
9
+ end
10
+
11
+ #if CONFIG["arch"] =~ /mswin32|mingw/
12
+ # $CFLAGS += " -march=i686"
13
+ #end
14
+
15
+ create_makefile(extension_name)
@@ -0,0 +1,103 @@
1
+ /*
2
+ * Copyright (c) Maarten Oelering, BrightCode BV
3
+ */
4
+
5
+ #include "ruby.h"
6
+ #ifdef RUBY_18
7
+ #include "rubyio.h"
8
+ #else
9
+ #include "ruby/io.h"
10
+ #endif
11
+
12
+ /* default allocated size is 16 */
13
+ #define DEF_ARRAY_LEN 32
14
+
15
+ #define UNQUOTED 0
16
+ #define IN_QUOTED 1
17
+ #define QUOTE_IN_QUOTED 2
18
+
19
+ /*
20
+ static VALUE cFastestCSV;
21
+ */
22
+ static VALUE mCsvParser;
23
+
24
+ static VALUE parse_line(VALUE self, VALUE str)
25
+ {
26
+ if (NIL_P(str))
27
+ return Qnil;
28
+
29
+ const char *ptr = RSTRING_PTR(str);
30
+ int len = (int) RSTRING_LEN(str); /* cast to prevent warning in 64-bit OS */
31
+
32
+ if (len == 0)
33
+ return Qnil;
34
+
35
+ VALUE array = rb_ary_new2(DEF_ARRAY_LEN);
36
+ char value[len]; /* field value, no longer than line */
37
+ int state = 0;
38
+ int index = 0;
39
+ int i;
40
+ char c;
41
+ for (i = 0; i < len; i++)
42
+ {
43
+ c = ptr[i];
44
+ switch (c)
45
+ {
46
+ case ',':
47
+ if (state == UNQUOTED) {
48
+ rb_ary_push(array, (index == 0 ? Qnil: rb_str_new(value, index)));
49
+ index = 0;
50
+ }
51
+ else if (state == IN_QUOTED) {
52
+ value[index++] = c;
53
+ }
54
+ else if (state == QUOTE_IN_QUOTED) {
55
+ rb_ary_push(array, rb_str_new(value, index));
56
+ index = 0;
57
+ state = UNQUOTED;
58
+ }
59
+ break;
60
+ case '"':
61
+ if (state == UNQUOTED) {
62
+ state = IN_QUOTED;
63
+ }
64
+ else if (state == 1) {
65
+ state = QUOTE_IN_QUOTED;
66
+ }
67
+ else if (state == QUOTE_IN_QUOTED) {
68
+ value[index++] = c; /* escaped quote */
69
+ state = IN_QUOTED;
70
+ }
71
+ break;
72
+ case 13: /* \r */
73
+ case 10: /* \n */
74
+ if (state == IN_QUOTED) {
75
+ value[index++] = c;
76
+ }
77
+ else {
78
+ i = len; /* only parse first line if multiline */
79
+ }
80
+ break;
81
+ default:
82
+ value[index++] = c;
83
+ }
84
+ }
85
+
86
+ if (state == UNQUOTED) {
87
+ rb_ary_push(array, (index == 0 ? Qnil: rb_str_new(value, index)));
88
+ }
89
+ else if (state == QUOTE_IN_QUOTED) {
90
+ rb_ary_push(array, rb_str_new(value, index));
91
+ }
92
+ return array;
93
+ }
94
+
95
+ void Init_csv_parser()
96
+ {
97
+ /*
98
+ cFastestCSV = rb_define_class("FastestCSV", rb_cObject);
99
+ rb_define_singleton_method(cFastestCSV, "parse_line", parse_line, 1);
100
+ */
101
+ mCsvParser = rb_define_module("CsvParser");
102
+ rb_define_module_function(mCsvParser, "parse_line", parse_line, 1);
103
+ }
@@ -0,0 +1,26 @@
1
+ # -*- encoding: utf-8 -*-
2
+ require File.expand_path('../lib/fastest-csv/version', __FILE__)
3
+
4
+ Gem::Specification.new do |gem|
5
+ gem.authors = ["Maarten Oelering"]
6
+ gem.email = ["maarten@brightcode.nl"]
7
+ gem.description = %q{Fastest standard CSV parser for MRI Ruby and JRuby}
8
+ gem.summary = %q{Fastest standard CSV parser for MRI Ruby and JRuby}
9
+ gem.homepage = "https://github.com/brightcode/fastest-csv"
10
+
11
+ gem.files = `git ls-files`.split($\)
12
+ #gem.executables = gem.files.grep(%r{^bin/}).map{ |f| File.basename(f) }
13
+ gem.test_files = gem.files.grep(%r{^(test|spec|features)/})
14
+ gem.name = "fastest-csv"
15
+ gem.require_paths = ["lib"]
16
+ gem.version = FastestCSV::VERSION
17
+
18
+ if RUBY_PLATFORM =~ /java/
19
+ gem.platform = "java"
20
+ gem.files << "lib/csv_parser.jar"
21
+ else
22
+ gem.extensions = ['ext/csv_parser/extconf.rb']
23
+ end
24
+
25
+ gem.add_development_dependency "rake-compiler"
26
+ end
@@ -0,0 +1 @@
1
+ require 'fastest_csv'
@@ -0,0 +1,3 @@
1
+ class FastestCSV
2
+ VERSION = "0.0.2"
3
+ end
@@ -0,0 +1,111 @@
1
+ # This loads either csv_parser.so, csv_parser.bundle or
2
+ # csv_parser.jar, depending on your Ruby platform and OS
3
+ require 'csv_parser'
4
+ require 'stringio'
5
+
6
+ # Fast CSV parser using native code
7
+ class FastestCSV
8
+
9
+ if RUBY_PLATFORM =~ /java/
10
+ require 'jruby'
11
+ org.brightcode.CsvParserService.new.basicLoad(JRuby.runtime)
12
+ end
13
+
14
+ # Pass each line of the specified +path+ as array to the provided +block+
15
+ def self.foreach(path, &block)
16
+ open(path) do |reader|
17
+ reader.each(&block)
18
+ end
19
+ end
20
+
21
+ # Opens a csv file. Pass a FastestCSV instance to the provided block,
22
+ # or return it when no block is provided
23
+ def self.open(path, mode = "rb")
24
+ csv = new(File.open(path, mode))
25
+ if block_given?
26
+ begin
27
+ yield csv
28
+ ensure
29
+ csv.close
30
+ end
31
+ else
32
+ csv
33
+ end
34
+ end
35
+
36
+ # Read all lines from the specified +path+ into an array of arrays
37
+ def self.read(path)
38
+ open(path, "rb") { |csv| csv.read }
39
+ end
40
+
41
+ # Alias for FastestCSV.read
42
+ def self.readlines(path)
43
+ read(path)
44
+ end
45
+
46
+ # Read all lines from the specified String into an array of arrays
47
+ def self.parse(data, &block)
48
+ csv = new(StringIO.new(data))
49
+ if block.nil?
50
+ begin
51
+ csv.read
52
+ ensure
53
+ csv.close
54
+ end
55
+ else
56
+ csv.each(&block)
57
+ end
58
+ end
59
+
60
+ def self.parse_line(line)
61
+ ::CsvParser.parse_line(line)
62
+ end
63
+
64
+ # Create new FastestCSV wrapping the specified IO object
65
+ def initialize(io)
66
+ @io = io
67
+ end
68
+
69
+ # Read from the wrapped IO passing each line as array to the specified block
70
+ def each
71
+ while row = shift
72
+ yield row
73
+ end
74
+ end
75
+
76
+ # Read all remaining lines from the wrapped IO into an array of arrays
77
+ def read
78
+ table = Array.new
79
+ each {|row| table << row}
80
+ table
81
+ end
82
+ alias_method :readlines, :read
83
+
84
+ # Read next line from the wrapped IO and return as array or nil at EOF
85
+ def shift
86
+ if line = @io.gets
87
+ ::CsvParser.parse_line(line)
88
+ else
89
+ nil
90
+ end
91
+ end
92
+ alias_method :gets, :shift
93
+ alias_method :readline, :shift
94
+
95
+ # Close the wrapped IO
96
+ def close
97
+ @io.close
98
+ end
99
+
100
+ def closed?
101
+ @io.closed?
102
+ end
103
+ end
104
+
105
+ class String
106
+ # Equivalent to <tt>FasterCSV::parse_line(self)</tt>
107
+ def parse_csv
108
+ ::CsvParser.parse_line(self)
109
+ end
110
+ end
111
+
@@ -0,0 +1,126 @@
1
+ #
2
+ # Tests copied from faster_csv by James Edward Gray II
3
+ #
4
+
5
+ require 'test/unit'
6
+ require 'fastest_csv'
7
+
8
+ #
9
+ # Following tests are my interpretation of the
10
+ # {CSV RCF}[http://www.ietf.org/rfc/rfc4180.txt]. I only deviate from that
11
+ # document in one place (intentionally) and that is to make the default row
12
+ # separator <tt>$/</tt>.
13
+ #
14
+ class TestCSVParsing < Test::Unit::TestCase
15
+
16
+ def test_mastering_regex_example
17
+ ex = %Q{Ten Thousand,10000, 2710 ,,"10,000","It's ""10 Grand"", baby",10K}
18
+ assert_equal( [ "Ten Thousand", "10000", " 2710 ", nil, "10,000",
19
+ "It's \"10 Grand\", baby", "10K" ],
20
+ CsvParser.parse_line(ex) )
21
+ end
22
+
23
+ # Pulled from: http://www.ruby-lang.org/cgi-bin/cvsweb.cgi/ruby/test/csv/test_csv.rb?rev=1.12.2.2;content-type=text%2Fplain
24
+ def test_std_lib_csv
25
+ [ ["\t", ["\t"]],
26
+ ["foo,\"\"\"\"\"\",baz", ["foo", "\"\"", "baz"]],
27
+ ["foo,\"\"\"bar\"\"\",baz", ["foo", "\"bar\"", "baz"]],
28
+ ["\"\"\"\n\",\"\"\"\n\"", ["\"\n", "\"\n"]],
29
+ ["foo,\"\r\n\",baz", ["foo", "\r\n", "baz"]],
30
+ ["\"\"", [""]],
31
+ ["foo,\"\"\"\",baz", ["foo", "\"", "baz"]],
32
+ ["foo,\"\r.\n\",baz", ["foo", "\r.\n", "baz"]],
33
+ ["foo,\"\r\",baz", ["foo", "\r", "baz"]],
34
+ ["foo,\"\",baz", ["foo", "", "baz"]],
35
+ ["\",\"", [","]],
36
+ ["foo", ["foo"]],
37
+ [",,", [nil, nil, nil]],
38
+ [",", [nil, nil]],
39
+ ["foo,\"\n\",baz", ["foo", "\n", "baz"]],
40
+ ["foo,,baz", ["foo", nil, "baz"]],
41
+ ["\"\"\"\r\",\"\"\"\r\"", ["\"\r", "\"\r"]],
42
+ ["\",\",\",\"", [",", ","]],
43
+ ["foo,bar,", ["foo", "bar", nil]],
44
+ [",foo,bar", [nil, "foo", "bar"]],
45
+ ["foo,bar", ["foo", "bar"]],
46
+ [";", [";"]],
47
+ ["\t,\t", ["\t", "\t"]],
48
+ ["foo,\"\r\n\r\",baz", ["foo", "\r\n\r", "baz"]],
49
+ ["foo,\"\r\n\n\",baz", ["foo", "\r\n\n", "baz"]],
50
+ ["foo,\"foo,bar\",baz", ["foo", "foo,bar", "baz"]],
51
+ [";,;", [";", ";"]] ].each do |csv_test|
52
+ assert_equal(csv_test.last, CsvParser.parse_line(csv_test.first))
53
+ end
54
+
55
+ [ ["foo,\"\"\"\"\"\",baz", ["foo", "\"\"", "baz"]],
56
+ ["foo,\"\"\"bar\"\"\",baz", ["foo", "\"bar\"", "baz"]],
57
+ ["foo,\"\r\n\",baz", ["foo", "\r\n", "baz"]],
58
+ ["\"\"", [""]],
59
+ ["foo,\"\"\"\",baz", ["foo", "\"", "baz"]],
60
+ ["foo,\"\r.\n\",baz", ["foo", "\r.\n", "baz"]],
61
+ ["foo,\"\r\",baz", ["foo", "\r", "baz"]],
62
+ ["foo,\"\",baz", ["foo", "", "baz"]],
63
+ ["foo", ["foo"]],
64
+ [",,", [nil, nil, nil]],
65
+ [",", [nil, nil]],
66
+ ["foo,\"\n\",baz", ["foo", "\n", "baz"]],
67
+ ["foo,,baz", ["foo", nil, "baz"]],
68
+ ["foo,bar", ["foo", "bar"]],
69
+ ["foo,\"\r\n\n\",baz", ["foo", "\r\n\n", "baz"]],
70
+ ["foo,\"foo,bar\",baz", ["foo", "foo,bar", "baz"]] ].each do |csv_test|
71
+ assert_equal(csv_test.last, CsvParser.parse_line(csv_test.first))
72
+ end
73
+ end
74
+
75
+ # From: http://ruby-talk.org/cgi-bin/scat.rb/ruby/ruby-core/6496
76
+ def test_aras_edge_cases
77
+ [ [%Q{a,b}, ["a", "b"]],
78
+ [%Q{a,"""b"""}, ["a", "\"b\""]],
79
+ [%Q{a,"""b"}, ["a", "\"b"]],
80
+ [%Q{a,"b"""}, ["a", "b\""]],
81
+ [%Q{a,"\nb"""}, ["a", "\nb\""]],
82
+ [%Q{a,"""\nb"}, ["a", "\"\nb"]],
83
+ [%Q{a,"""\nb\n"""}, ["a", "\"\nb\n\""]],
84
+ [%Q{a,"""\nb\n""",\nc}, ["a", "\"\nb\n\"", nil]],
85
+ [%Q{a,,,}, ["a", nil, nil, nil]],
86
+ [%Q{,}, [nil, nil]],
87
+ [%Q{"",""}, ["", ""]],
88
+ [%Q{""""}, ["\""]],
89
+ [%Q{"""",""}, ["\"",""]],
90
+ [%Q{,""}, [nil,""]],
91
+ [%Q{,"\r"}, [nil,"\r"]],
92
+ [%Q{"\r\n,"}, ["\r\n,"]],
93
+ [%Q{"\r\n,",}, ["\r\n,", nil]] ].each do |edge_case|
94
+ assert_equal(edge_case.last, CsvParser.parse_line(edge_case.first))
95
+ end
96
+ end
97
+
98
+ def test_james_edge_cases
99
+ # A read at eof? should return nil.
100
+ assert_equal(nil, CsvParser.parse_line(""))
101
+ #
102
+ # With CSV it's impossible to tell an empty line from a line containing a
103
+ # single +nil+ field. The standard CSV library returns <tt>[nil]</tt>
104
+ # in these cases, but <tt>Array.new</tt> makes more sense to me.
105
+ #
106
+ #assert_equal(Array.new, FastestCSV.parse_line("\n1,2,3\n"))
107
+ assert_equal([nil], CsvParser.parse_line("\n1,2,3\n"))
108
+ end
109
+
110
+ def test_rob_edge_cases
111
+ [ [%Q{"a\nb"}, ["a\nb"]],
112
+ [%Q{"\n\n\n"}, ["\n\n\n"]],
113
+ [%Q{a,"b\n\nc"}, ['a', "b\n\nc"]],
114
+ [%Q{,"\r\n"}, [nil,"\r\n"]],
115
+ [%Q{,"\r\n."}, [nil,"\r\n."]],
116
+ [%Q{"a\na","one newline"}, ["a\na", 'one newline']],
117
+ [%Q{"a\n\na","two newlines"}, ["a\n\na", 'two newlines']],
118
+ [%Q{"a\r\na","one CRLF"}, ["a\r\na", 'one CRLF']],
119
+ [%Q{"a\r\n\r\na","two CRLFs"}, ["a\r\n\r\na", 'two CRLFs']],
120
+ [%Q{with blank,"start\n\nfinish"\n}, ['with blank', "start\n\nfinish"]],
121
+ ].each do |edge_case|
122
+ assert_equal(edge_case.last, CsvParser.parse_line(edge_case.first))
123
+ end
124
+ end
125
+
126
+ end
@@ -0,0 +1,119 @@
1
+ #
2
+ # Tests copied from faster_csv by James Edward Gray II
3
+ #
4
+
5
+ require 'test/unit'
6
+ require 'fastest_csv'
7
+
8
+ class TestFastestCSVInterface < Test::Unit::TestCase
9
+
10
+ def setup
11
+ @path = File.join(File.dirname(__FILE__), "temp_test_data.csv")
12
+
13
+ File.open(@path, "w") do |file|
14
+ file << "1,2,3\r\n"
15
+ file << "4,5\r\n"
16
+ end
17
+
18
+ @expected = [%w{1 2 3}, %w{4 5}]
19
+ end
20
+
21
+ def teardown
22
+ File.unlink(@path)
23
+ end
24
+
25
+ ### Test Read Interface ###
26
+
27
+ def test_foreach
28
+ FastestCSV.foreach(@path) do |row|
29
+ assert_equal(@expected.shift, row)
30
+ end
31
+ end
32
+
33
+ def test_open_and_close
34
+ csv = FastestCSV.open(@path, "r+")
35
+ assert_not_nil(csv)
36
+ assert_instance_of(FastestCSV, csv)
37
+ assert_equal(false, csv.closed?)
38
+ csv.close
39
+ assert(csv.closed?)
40
+
41
+ ret = FastestCSV.open(@path) do |csv|
42
+ assert_instance_of(FastestCSV, csv)
43
+ "Return value."
44
+ end
45
+ assert(csv.closed?)
46
+ assert_equal("Return value.", ret)
47
+ end
48
+
49
+ def test_parse
50
+ data = File.read(@path)
51
+ assert_equal( @expected,
52
+ FastestCSV.parse(data) )
53
+
54
+ FastestCSV.parse(data) do |row|
55
+ assert_equal(@expected.shift, row)
56
+ end
57
+ end
58
+
59
+ #def test_parse_line
60
+ # row = FasterCSV.parse_line("1;2;3", :col_sep => ";")
61
+ # assert_not_nil(row)
62
+ # assert_instance_of(Array, row)
63
+ # assert_equal(%w{1 2 3}, row)
64
+ #
65
+ # # shortcut interface
66
+ # row = "1;2;3".parse_csv(:col_sep => ";")
67
+ # assert_not_nil(row)
68
+ # assert_instance_of(Array, row)
69
+ # assert_equal(%w{1 2 3}, row)
70
+ #end
71
+
72
+ def test_parse_line_with_empty_lines
73
+ assert_equal(nil, FastestCSV.parse_line("")) # to signal eof
74
+ #assert_equal(Array.new, FastestCSV.parse_line("\n1,2,3"))
75
+ assert_equal([nil], FastestCSV.parse_line("\n1,2,3"))
76
+ end
77
+
78
+ def test_read_and_readlines
79
+ assert_equal( @expected,
80
+ FastestCSV.read(@path) )
81
+ assert_equal( @expected,
82
+ FastestCSV.readlines(@path))
83
+
84
+
85
+ data = FastestCSV.open(@path) do |csv|
86
+ csv.read
87
+ end
88
+ assert_equal(@expected, data)
89
+ data = FastestCSV.open(@path) do |csv|
90
+ csv.readlines
91
+ end
92
+ assert_equal(@expected, data)
93
+ end
94
+
95
+ #def test_table
96
+ # table = FastestCSV.table(@path)
97
+ # assert_instance_of(FastestCSV::Table, table)
98
+ # assert_equal([[:"1", :"2", :"3"], [4, 5, nil]], table.to_a)
99
+ #end
100
+
101
+ def test_shift # aliased as gets() and readline()
102
+ FastestCSV.open(@path, "r+") do |csv|
103
+ assert_equal(@expected.shift, csv.shift)
104
+ assert_equal(@expected.shift, csv.shift)
105
+ assert_equal(nil, csv.shift)
106
+ end
107
+ end
108
+
109
+ def test_long_line # ruby's regex parser may have problems with long rows
110
+ File.unlink(@path)
111
+
112
+ long_field_length = 2800
113
+ File.open(@path, "w") do |file|
114
+ file << "1,2,#{'3' * long_field_length}\r\n"
115
+ end
116
+ @expected = [%w{1 2} + ['3' * long_field_length]]
117
+ test_shift
118
+ end
119
+ end
metadata ADDED
@@ -0,0 +1,80 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: fastest-csv
3
+ version: !ruby/object:Gem::Version
4
+ prerelease:
5
+ version: 0.0.2
6
+ platform: java
7
+ authors:
8
+ - Maarten Oelering
9
+ autorequire:
10
+ bindir: bin
11
+ cert_chain: []
12
+
13
+ date: 2013-01-13 00:00:00 Z
14
+ dependencies:
15
+ - !ruby/object:Gem::Dependency
16
+ name: rake-compiler
17
+ prerelease: false
18
+ requirement: &id001 !ruby/object:Gem::Requirement
19
+ none: false
20
+ requirements:
21
+ - - ">="
22
+ - !ruby/object:Gem::Version
23
+ version: "0"
24
+ type: :development
25
+ version_requirements: *id001
26
+ description: Fastest standard CSV parser for MRI Ruby and JRuby
27
+ email:
28
+ - maarten@brightcode.nl
29
+ executables: []
30
+
31
+ extensions: []
32
+
33
+ extra_rdoc_files: []
34
+
35
+ files:
36
+ - .gitignore
37
+ - Gemfile
38
+ - LICENSE
39
+ - README.md
40
+ - Rakefile
41
+ - ext/csv_parser/CsvParserService.java
42
+ - ext/csv_parser/extconf.rb
43
+ - ext/csv_parser/parser.c
44
+ - fastest-csv.gemspec
45
+ - lib/fastest-csv.rb
46
+ - lib/fastest-csv/version.rb
47
+ - lib/fastest_csv.rb
48
+ - test/tc_csv_parsing.rb
49
+ - test/tc_interface.rb
50
+ - lib/csv_parser.jar
51
+ homepage: https://github.com/brightcode/fastest-csv
52
+ licenses: []
53
+
54
+ post_install_message:
55
+ rdoc_options: []
56
+
57
+ require_paths:
58
+ - lib
59
+ required_ruby_version: !ruby/object:Gem::Requirement
60
+ none: false
61
+ requirements:
62
+ - - ">="
63
+ - !ruby/object:Gem::Version
64
+ version: "0"
65
+ required_rubygems_version: !ruby/object:Gem::Requirement
66
+ none: false
67
+ requirements:
68
+ - - ">="
69
+ - !ruby/object:Gem::Version
70
+ version: "0"
71
+ requirements: []
72
+
73
+ rubyforge_project:
74
+ rubygems_version: 1.8.24
75
+ signing_key:
76
+ specification_version: 3
77
+ summary: Fastest standard CSV parser for MRI Ruby and JRuby
78
+ test_files:
79
+ - test/tc_csv_parsing.rb
80
+ - test/tc_interface.rb