fastest-csv 0.0.2-java

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,20 @@
1
+ *.gem
2
+ *.rbc
3
+ .bundle
4
+ .config
5
+ .yardoc
6
+ .DS_Store
7
+ Gemfile.lock
8
+ InstalledFiles
9
+ _yardoc
10
+ coverage
11
+ doc/
12
+ lib/bundler/man
13
+ pkg
14
+ rdoc
15
+ spec/reports
16
+ test/tmp
17
+ test/version_tmp
18
+ tmp
19
+ lib/*.bundle
20
+ lib/*.jar
data/Gemfile ADDED
@@ -0,0 +1,4 @@
1
+ source 'https://rubygems.org'
2
+
3
+ # Specify your gem's dependencies in fastest-csv.gemspec
4
+ gemspec
data/LICENSE ADDED
@@ -0,0 +1,22 @@
1
+ Copyright (c) 2012 Maarten Oelering
2
+
3
+ MIT License
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining
6
+ a copy of this software and associated documentation files (the
7
+ "Software"), to deal in the Software without restriction, including
8
+ without limitation the rights to use, copy, modify, merge, publish,
9
+ distribute, sublicense, and/or sell copies of the Software, and to
10
+ permit persons to whom the Software is furnished to do so, subject to
11
+ the following conditions:
12
+
13
+ The above copyright notice and this permission notice shall be
14
+ included in all copies or substantial portions of the Software.
15
+
16
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
17
+ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
18
+ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
19
+ NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
20
+ LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
21
+ OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
22
+ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
@@ -0,0 +1,68 @@
1
+ # FastestCSV
2
+
3
+ Fastest CSV class for MRI Ruby and JRuby. Faster than faster_csv and fasterer-csv.
4
+
5
+ Uses native C code to parse CSV lines in MRI Ruby and Java in JRuby.
6
+
7
+ Supports standard CSV according to RFC4180. Not the so-called "csv" from Excel.
8
+
9
+ The interface is a subset of the CSV interface in Ruby 1.9.3. The options parameter is not supported.
10
+
11
+ Originally developed to parse large CSV log files from PowerMTA.
12
+
13
+ ## Installation
14
+
15
+ Add this line to your application's Gemfile:
16
+
17
+ gem 'fastest-csv'
18
+
19
+ And then execute:
20
+
21
+ $ bundle
22
+
23
+ Or install it yourself as:
24
+
25
+ $ gem install fastest-csv
26
+
27
+ ## Usage
28
+
29
+ Parse single line
30
+
31
+ FastestCSV.parse_line("one,two,three")
32
+ => ["one", "two", "three"]
33
+
34
+ "one,two,three".parse_csv
35
+ => ["one", "two", "three"]
36
+
37
+ Parse file without header
38
+
39
+ FastestCSV.foreach("path/to/file.csv") do |row|
40
+ while row = csv.shift
41
+ #
42
+ end
43
+ end
44
+
45
+ Parse file with header
46
+
47
+ FastestCSV.open("path/to/file.csv") do |csv|
48
+ fields = csv.shift
49
+ while values = csv.shift
50
+ #
51
+ end
52
+ end
53
+
54
+ Parse file in array of arrays
55
+
56
+ rows = FastestCSV.read("path/to/file.csv")
57
+
58
+ Parse string in array of arrays
59
+
60
+ rows = FastestCSV.parse(csv_data)
61
+
62
+ ## Contributing
63
+
64
+ 1. Fork it
65
+ 2. Create your feature branch (`git checkout -b my-new-feature`)
66
+ 3. Commit your changes (`git commit -am 'Added some feature'`)
67
+ 4. Push to the branch (`git push origin my-new-feature`)
68
+ 5. Create new Pull Request
@@ -0,0 +1,22 @@
1
+ #!/usr/bin/env rake
2
+ require "bundler/gem_tasks"
3
+
4
+ spec = Gem::Specification.load('fastest-csv.gemspec')
5
+
6
+ if RUBY_PLATFORM =~ /java/
7
+ require 'rake/javaextensiontask'
8
+ Rake::JavaExtensionTask.new('csv_parser', spec)
9
+ else
10
+ require 'rake/extensiontask'
11
+ Rake::ExtensionTask.new('csv_parser', spec)
12
+ end
13
+
14
+ require 'rake/testtask'
15
+ Rake::TestTask.new do |t|
16
+ t.libs << "test"
17
+ t.test_files = FileList['test/tc_*.rb']
18
+ #test.libs << 'lib' << 'test'
19
+ #test.pattern = 'test/**/test_*.rb'
20
+ #test.verbose = true
21
+ end
22
+
@@ -0,0 +1,112 @@
1
+ //
2
+ // Copyright (c) Maarten Oelering, BrightCode BV
3
+ //
4
+
5
+ package org.brightcode;
6
+
7
+ import java.io.IOException;
8
+
9
+ import org.jruby.Ruby;
10
+ import org.jruby.RubyArray;
11
+ import org.jruby.RubyModule;
12
+ import org.jruby.RubyString;
13
+ import org.jruby.runtime.Block;
14
+ import org.jruby.runtime.CallbackFactory;
15
+ import org.jruby.runtime.builtin.IRubyObject;
16
+ import org.jruby.runtime.load.BasicLibraryService;
17
+
18
+ public class CsvParserService implements BasicLibraryService {
19
+
20
+ private Ruby runtime;
21
+
22
+ private static int UNQUOTED = 0;
23
+ private static int IN_QUOTED = 1;
24
+ private static int QUOTE_IN_QUOTED = 2;
25
+
26
+ // Initial setup function. Takes a reference to the current JRuby runtime and
27
+ // sets up our modules.
28
+ public boolean basicLoad(Ruby runtime) throws IOException {
29
+ this.runtime = runtime;
30
+
31
+ RubyModule mCsvParser = runtime.defineModule("CsvParser");
32
+ CallbackFactory callbackFactory = runtime.callbackFactory(CsvParserService.class);
33
+ mCsvParser.defineModuleFunction("parse_line",
34
+ callbackFactory.getSingletonMethod("parseLine", RubyString.class));
35
+ return true;
36
+ }
37
+
38
+ public static IRubyObject parseLine(IRubyObject recv, RubyString line, Block unusedBlock) {
39
+ Ruby runtime = recv.getRuntime();
40
+
41
+ CharSequence seq = line.getValue();
42
+ int length = seq.length();
43
+ if (length == 0)
44
+ return runtime.getNil();
45
+
46
+ int state = UNQUOTED;
47
+ StringBuilder value = new StringBuilder(length); // field value, no longer than line
48
+ RubyArray array = RubyArray.newArray(runtime, 36);
49
+
50
+ for (int i = 0; i < length; i++) {
51
+ char c = seq.charAt(i);
52
+ switch (c) {
53
+ case ',':
54
+ if (state == UNQUOTED) {
55
+ if (value.length() == 0) {
56
+ array.append(runtime.getNil());
57
+ }
58
+ else {
59
+ array.append(RubyString.newString(runtime, value));
60
+ value.setLength(0);
61
+ }
62
+ }
63
+ else if (state == IN_QUOTED) {
64
+ value.append(c);
65
+ }
66
+ else if (state == 2) {
67
+ array.append(RubyString.newString(runtime, value));
68
+ value.setLength(0);
69
+ state = UNQUOTED;
70
+ }
71
+ break;
72
+ case '"':
73
+ if (state == UNQUOTED) {
74
+ state = IN_QUOTED;
75
+ }
76
+ else if (state == IN_QUOTED) {
77
+ state = QUOTE_IN_QUOTED;
78
+ }
79
+ else if (state == QUOTE_IN_QUOTED) {
80
+ value.append(c); // escaped quote
81
+ state = IN_QUOTED;
82
+ }
83
+ break;
84
+ case '\r':
85
+ case '\n':
86
+ if (state == IN_QUOTED) {
87
+ value.append(c);
88
+ }
89
+ else {
90
+ i = length; // only parse first line if multiline
91
+ }
92
+ break;
93
+ default:
94
+ value.append(c);
95
+ break;
96
+ }
97
+ }
98
+ if (state == UNQUOTED) {
99
+ if (value.length() == 0) {
100
+ array.append(runtime.getNil());
101
+ }
102
+ else {
103
+ array.append(RubyString.newString(runtime, value));
104
+ value.setLength(0);
105
+ }
106
+ }
107
+ else if (state == QUOTE_IN_QUOTED) {
108
+ array.append(RubyString.newString(runtime, value));
109
+ }
110
+ return array;
111
+ }
112
+ }
@@ -0,0 +1,15 @@
1
+ #!/usr/bin/ruby -w
2
+
3
+ require 'mkmf'
4
+ extension_name = 'csv_parser'
5
+ #dir_config(extension_name)
6
+
7
+ if RUBY_VERSION =~ /1.8/ then
8
+ $CPPFLAGS += " -DRUBY_18"
9
+ end
10
+
11
+ #if CONFIG["arch"] =~ /mswin32|mingw/
12
+ # $CFLAGS += " -march=i686"
13
+ #end
14
+
15
+ create_makefile(extension_name)
@@ -0,0 +1,103 @@
1
+ /*
2
+ * Copyright (c) Maarten Oelering, BrightCode BV
3
+ */
4
+
5
+ #include "ruby.h"
6
+ #ifdef RUBY_18
7
+ #include "rubyio.h"
8
+ #else
9
+ #include "ruby/io.h"
10
+ #endif
11
+
12
+ /* default allocated size is 16 */
13
+ #define DEF_ARRAY_LEN 32
14
+
15
+ #define UNQUOTED 0
16
+ #define IN_QUOTED 1
17
+ #define QUOTE_IN_QUOTED 2
18
+
19
+ /*
20
+ static VALUE cFastestCSV;
21
+ */
22
+ static VALUE mCsvParser;
23
+
24
+ static VALUE parse_line(VALUE self, VALUE str)
25
+ {
26
+ if (NIL_P(str))
27
+ return Qnil;
28
+
29
+ const char *ptr = RSTRING_PTR(str);
30
+ int len = (int) RSTRING_LEN(str); /* cast to prevent warning in 64-bit OS */
31
+
32
+ if (len == 0)
33
+ return Qnil;
34
+
35
+ VALUE array = rb_ary_new2(DEF_ARRAY_LEN);
36
+ char value[len]; /* field value, no longer than line */
37
+ int state = 0;
38
+ int index = 0;
39
+ int i;
40
+ char c;
41
+ for (i = 0; i < len; i++)
42
+ {
43
+ c = ptr[i];
44
+ switch (c)
45
+ {
46
+ case ',':
47
+ if (state == UNQUOTED) {
48
+ rb_ary_push(array, (index == 0 ? Qnil: rb_str_new(value, index)));
49
+ index = 0;
50
+ }
51
+ else if (state == IN_QUOTED) {
52
+ value[index++] = c;
53
+ }
54
+ else if (state == QUOTE_IN_QUOTED) {
55
+ rb_ary_push(array, rb_str_new(value, index));
56
+ index = 0;
57
+ state = UNQUOTED;
58
+ }
59
+ break;
60
+ case '"':
61
+ if (state == UNQUOTED) {
62
+ state = IN_QUOTED;
63
+ }
64
+ else if (state == 1) {
65
+ state = QUOTE_IN_QUOTED;
66
+ }
67
+ else if (state == QUOTE_IN_QUOTED) {
68
+ value[index++] = c; /* escaped quote */
69
+ state = IN_QUOTED;
70
+ }
71
+ break;
72
+ case 13: /* \r */
73
+ case 10: /* \n */
74
+ if (state == IN_QUOTED) {
75
+ value[index++] = c;
76
+ }
77
+ else {
78
+ i = len; /* only parse first line if multiline */
79
+ }
80
+ break;
81
+ default:
82
+ value[index++] = c;
83
+ }
84
+ }
85
+
86
+ if (state == UNQUOTED) {
87
+ rb_ary_push(array, (index == 0 ? Qnil: rb_str_new(value, index)));
88
+ }
89
+ else if (state == QUOTE_IN_QUOTED) {
90
+ rb_ary_push(array, rb_str_new(value, index));
91
+ }
92
+ return array;
93
+ }
94
+
95
+ void Init_csv_parser()
96
+ {
97
+ /*
98
+ cFastestCSV = rb_define_class("FastestCSV", rb_cObject);
99
+ rb_define_singleton_method(cFastestCSV, "parse_line", parse_line, 1);
100
+ */
101
+ mCsvParser = rb_define_module("CsvParser");
102
+ rb_define_module_function(mCsvParser, "parse_line", parse_line, 1);
103
+ }
@@ -0,0 +1,26 @@
1
+ # -*- encoding: utf-8 -*-
2
+ require File.expand_path('../lib/fastest-csv/version', __FILE__)
3
+
4
+ Gem::Specification.new do |gem|
5
+ gem.authors = ["Maarten Oelering"]
6
+ gem.email = ["maarten@brightcode.nl"]
7
+ gem.description = %q{Fastest standard CSV parser for MRI Ruby and JRuby}
8
+ gem.summary = %q{Fastest standard CSV parser for MRI Ruby and JRuby}
9
+ gem.homepage = "https://github.com/brightcode/fastest-csv"
10
+
11
+ gem.files = `git ls-files`.split($\)
12
+ #gem.executables = gem.files.grep(%r{^bin/}).map{ |f| File.basename(f) }
13
+ gem.test_files = gem.files.grep(%r{^(test|spec|features)/})
14
+ gem.name = "fastest-csv"
15
+ gem.require_paths = ["lib"]
16
+ gem.version = FastestCSV::VERSION
17
+
18
+ if RUBY_PLATFORM =~ /java/
19
+ gem.platform = "java"
20
+ gem.files << "lib/csv_parser.jar"
21
+ else
22
+ gem.extensions = ['ext/csv_parser/extconf.rb']
23
+ end
24
+
25
+ gem.add_development_dependency "rake-compiler"
26
+ end
@@ -0,0 +1 @@
1
+ require 'fastest_csv'
@@ -0,0 +1,3 @@
1
+ class FastestCSV
2
+ VERSION = "0.0.2"
3
+ end
@@ -0,0 +1,111 @@
1
+ # This loads either csv_parser.so, csv_parser.bundle or
2
+ # csv_parser.jar, depending on your Ruby platform and OS
3
+ require 'csv_parser'
4
+ require 'stringio'
5
+
6
+ # Fast CSV parser using native code
7
+ class FastestCSV
8
+
9
+ if RUBY_PLATFORM =~ /java/
10
+ require 'jruby'
11
+ org.brightcode.CsvParserService.new.basicLoad(JRuby.runtime)
12
+ end
13
+
14
+ # Pass each line of the specified +path+ as array to the provided +block+
15
+ def self.foreach(path, &block)
16
+ open(path) do |reader|
17
+ reader.each(&block)
18
+ end
19
+ end
20
+
21
+ # Opens a csv file. Pass a FastestCSV instance to the provided block,
22
+ # or return it when no block is provided
23
+ def self.open(path, mode = "rb")
24
+ csv = new(File.open(path, mode))
25
+ if block_given?
26
+ begin
27
+ yield csv
28
+ ensure
29
+ csv.close
30
+ end
31
+ else
32
+ csv
33
+ end
34
+ end
35
+
36
+ # Read all lines from the specified +path+ into an array of arrays
37
+ def self.read(path)
38
+ open(path, "rb") { |csv| csv.read }
39
+ end
40
+
41
+ # Alias for FastestCSV.read
42
+ def self.readlines(path)
43
+ read(path)
44
+ end
45
+
46
+ # Read all lines from the specified String into an array of arrays
47
+ def self.parse(data, &block)
48
+ csv = new(StringIO.new(data))
49
+ if block.nil?
50
+ begin
51
+ csv.read
52
+ ensure
53
+ csv.close
54
+ end
55
+ else
56
+ csv.each(&block)
57
+ end
58
+ end
59
+
60
+ def self.parse_line(line)
61
+ ::CsvParser.parse_line(line)
62
+ end
63
+
64
+ # Create new FastestCSV wrapping the specified IO object
65
+ def initialize(io)
66
+ @io = io
67
+ end
68
+
69
+ # Read from the wrapped IO passing each line as array to the specified block
70
+ def each
71
+ while row = shift
72
+ yield row
73
+ end
74
+ end
75
+
76
+ # Read all remaining lines from the wrapped IO into an array of arrays
77
+ def read
78
+ table = Array.new
79
+ each {|row| table << row}
80
+ table
81
+ end
82
+ alias_method :readlines, :read
83
+
84
+ # Read next line from the wrapped IO and return as array or nil at EOF
85
+ def shift
86
+ if line = @io.gets
87
+ ::CsvParser.parse_line(line)
88
+ else
89
+ nil
90
+ end
91
+ end
92
+ alias_method :gets, :shift
93
+ alias_method :readline, :shift
94
+
95
+ # Close the wrapped IO
96
+ def close
97
+ @io.close
98
+ end
99
+
100
+ def closed?
101
+ @io.closed?
102
+ end
103
+ end
104
+
105
+ class String
106
+ # Equivalent to <tt>FasterCSV::parse_line(self)</tt>
107
+ def parse_csv
108
+ ::CsvParser.parse_line(self)
109
+ end
110
+ end
111
+
@@ -0,0 +1,126 @@
1
+ #
2
+ # Tests copied from faster_csv by James Edward Gray II
3
+ #
4
+
5
+ require 'test/unit'
6
+ require 'fastest_csv'
7
+
8
+ #
9
+ # Following tests are my interpretation of the
10
+ # {CSV RCF}[http://www.ietf.org/rfc/rfc4180.txt]. I only deviate from that
11
+ # document in one place (intentionally) and that is to make the default row
12
+ # separator <tt>$/</tt>.
13
+ #
14
+ class TestCSVParsing < Test::Unit::TestCase
15
+
16
+ def test_mastering_regex_example
17
+ ex = %Q{Ten Thousand,10000, 2710 ,,"10,000","It's ""10 Grand"", baby",10K}
18
+ assert_equal( [ "Ten Thousand", "10000", " 2710 ", nil, "10,000",
19
+ "It's \"10 Grand\", baby", "10K" ],
20
+ CsvParser.parse_line(ex) )
21
+ end
22
+
23
+ # Pulled from: http://www.ruby-lang.org/cgi-bin/cvsweb.cgi/ruby/test/csv/test_csv.rb?rev=1.12.2.2;content-type=text%2Fplain
24
+ def test_std_lib_csv
25
+ [ ["\t", ["\t"]],
26
+ ["foo,\"\"\"\"\"\",baz", ["foo", "\"\"", "baz"]],
27
+ ["foo,\"\"\"bar\"\"\",baz", ["foo", "\"bar\"", "baz"]],
28
+ ["\"\"\"\n\",\"\"\"\n\"", ["\"\n", "\"\n"]],
29
+ ["foo,\"\r\n\",baz", ["foo", "\r\n", "baz"]],
30
+ ["\"\"", [""]],
31
+ ["foo,\"\"\"\",baz", ["foo", "\"", "baz"]],
32
+ ["foo,\"\r.\n\",baz", ["foo", "\r.\n", "baz"]],
33
+ ["foo,\"\r\",baz", ["foo", "\r", "baz"]],
34
+ ["foo,\"\",baz", ["foo", "", "baz"]],
35
+ ["\",\"", [","]],
36
+ ["foo", ["foo"]],
37
+ [",,", [nil, nil, nil]],
38
+ [",", [nil, nil]],
39
+ ["foo,\"\n\",baz", ["foo", "\n", "baz"]],
40
+ ["foo,,baz", ["foo", nil, "baz"]],
41
+ ["\"\"\"\r\",\"\"\"\r\"", ["\"\r", "\"\r"]],
42
+ ["\",\",\",\"", [",", ","]],
43
+ ["foo,bar,", ["foo", "bar", nil]],
44
+ [",foo,bar", [nil, "foo", "bar"]],
45
+ ["foo,bar", ["foo", "bar"]],
46
+ [";", [";"]],
47
+ ["\t,\t", ["\t", "\t"]],
48
+ ["foo,\"\r\n\r\",baz", ["foo", "\r\n\r", "baz"]],
49
+ ["foo,\"\r\n\n\",baz", ["foo", "\r\n\n", "baz"]],
50
+ ["foo,\"foo,bar\",baz", ["foo", "foo,bar", "baz"]],
51
+ [";,;", [";", ";"]] ].each do |csv_test|
52
+ assert_equal(csv_test.last, CsvParser.parse_line(csv_test.first))
53
+ end
54
+
55
+ [ ["foo,\"\"\"\"\"\",baz", ["foo", "\"\"", "baz"]],
56
+ ["foo,\"\"\"bar\"\"\",baz", ["foo", "\"bar\"", "baz"]],
57
+ ["foo,\"\r\n\",baz", ["foo", "\r\n", "baz"]],
58
+ ["\"\"", [""]],
59
+ ["foo,\"\"\"\",baz", ["foo", "\"", "baz"]],
60
+ ["foo,\"\r.\n\",baz", ["foo", "\r.\n", "baz"]],
61
+ ["foo,\"\r\",baz", ["foo", "\r", "baz"]],
62
+ ["foo,\"\",baz", ["foo", "", "baz"]],
63
+ ["foo", ["foo"]],
64
+ [",,", [nil, nil, nil]],
65
+ [",", [nil, nil]],
66
+ ["foo,\"\n\",baz", ["foo", "\n", "baz"]],
67
+ ["foo,,baz", ["foo", nil, "baz"]],
68
+ ["foo,bar", ["foo", "bar"]],
69
+ ["foo,\"\r\n\n\",baz", ["foo", "\r\n\n", "baz"]],
70
+ ["foo,\"foo,bar\",baz", ["foo", "foo,bar", "baz"]] ].each do |csv_test|
71
+ assert_equal(csv_test.last, CsvParser.parse_line(csv_test.first))
72
+ end
73
+ end
74
+
75
+ # From: http://ruby-talk.org/cgi-bin/scat.rb/ruby/ruby-core/6496
76
+ def test_aras_edge_cases
77
+ [ [%Q{a,b}, ["a", "b"]],
78
+ [%Q{a,"""b"""}, ["a", "\"b\""]],
79
+ [%Q{a,"""b"}, ["a", "\"b"]],
80
+ [%Q{a,"b"""}, ["a", "b\""]],
81
+ [%Q{a,"\nb"""}, ["a", "\nb\""]],
82
+ [%Q{a,"""\nb"}, ["a", "\"\nb"]],
83
+ [%Q{a,"""\nb\n"""}, ["a", "\"\nb\n\""]],
84
+ [%Q{a,"""\nb\n""",\nc}, ["a", "\"\nb\n\"", nil]],
85
+ [%Q{a,,,}, ["a", nil, nil, nil]],
86
+ [%Q{,}, [nil, nil]],
87
+ [%Q{"",""}, ["", ""]],
88
+ [%Q{""""}, ["\""]],
89
+ [%Q{"""",""}, ["\"",""]],
90
+ [%Q{,""}, [nil,""]],
91
+ [%Q{,"\r"}, [nil,"\r"]],
92
+ [%Q{"\r\n,"}, ["\r\n,"]],
93
+ [%Q{"\r\n,",}, ["\r\n,", nil]] ].each do |edge_case|
94
+ assert_equal(edge_case.last, CsvParser.parse_line(edge_case.first))
95
+ end
96
+ end
97
+
98
+ def test_james_edge_cases
99
+ # A read at eof? should return nil.
100
+ assert_equal(nil, CsvParser.parse_line(""))
101
+ #
102
+ # With CSV it's impossible to tell an empty line from a line containing a
103
+ # single +nil+ field. The standard CSV library returns <tt>[nil]</tt>
104
+ # in these cases, but <tt>Array.new</tt> makes more sense to me.
105
+ #
106
+ #assert_equal(Array.new, FastestCSV.parse_line("\n1,2,3\n"))
107
+ assert_equal([nil], CsvParser.parse_line("\n1,2,3\n"))
108
+ end
109
+
110
+ def test_rob_edge_cases
111
+ [ [%Q{"a\nb"}, ["a\nb"]],
112
+ [%Q{"\n\n\n"}, ["\n\n\n"]],
113
+ [%Q{a,"b\n\nc"}, ['a', "b\n\nc"]],
114
+ [%Q{,"\r\n"}, [nil,"\r\n"]],
115
+ [%Q{,"\r\n."}, [nil,"\r\n."]],
116
+ [%Q{"a\na","one newline"}, ["a\na", 'one newline']],
117
+ [%Q{"a\n\na","two newlines"}, ["a\n\na", 'two newlines']],
118
+ [%Q{"a\r\na","one CRLF"}, ["a\r\na", 'one CRLF']],
119
+ [%Q{"a\r\n\r\na","two CRLFs"}, ["a\r\n\r\na", 'two CRLFs']],
120
+ [%Q{with blank,"start\n\nfinish"\n}, ['with blank', "start\n\nfinish"]],
121
+ ].each do |edge_case|
122
+ assert_equal(edge_case.last, CsvParser.parse_line(edge_case.first))
123
+ end
124
+ end
125
+
126
+ end
@@ -0,0 +1,119 @@
1
+ #
2
+ # Tests copied from faster_csv by James Edward Gray II
3
+ #
4
+
5
+ require 'test/unit'
6
+ require 'fastest_csv'
7
+
8
+ class TestFastestCSVInterface < Test::Unit::TestCase
9
+
10
+ def setup
11
+ @path = File.join(File.dirname(__FILE__), "temp_test_data.csv")
12
+
13
+ File.open(@path, "w") do |file|
14
+ file << "1,2,3\r\n"
15
+ file << "4,5\r\n"
16
+ end
17
+
18
+ @expected = [%w{1 2 3}, %w{4 5}]
19
+ end
20
+
21
+ def teardown
22
+ File.unlink(@path)
23
+ end
24
+
25
+ ### Test Read Interface ###
26
+
27
+ def test_foreach
28
+ FastestCSV.foreach(@path) do |row|
29
+ assert_equal(@expected.shift, row)
30
+ end
31
+ end
32
+
33
+ def test_open_and_close
34
+ csv = FastestCSV.open(@path, "r+")
35
+ assert_not_nil(csv)
36
+ assert_instance_of(FastestCSV, csv)
37
+ assert_equal(false, csv.closed?)
38
+ csv.close
39
+ assert(csv.closed?)
40
+
41
+ ret = FastestCSV.open(@path) do |csv|
42
+ assert_instance_of(FastestCSV, csv)
43
+ "Return value."
44
+ end
45
+ assert(csv.closed?)
46
+ assert_equal("Return value.", ret)
47
+ end
48
+
49
+ def test_parse
50
+ data = File.read(@path)
51
+ assert_equal( @expected,
52
+ FastestCSV.parse(data) )
53
+
54
+ FastestCSV.parse(data) do |row|
55
+ assert_equal(@expected.shift, row)
56
+ end
57
+ end
58
+
59
+ #def test_parse_line
60
+ # row = FasterCSV.parse_line("1;2;3", :col_sep => ";")
61
+ # assert_not_nil(row)
62
+ # assert_instance_of(Array, row)
63
+ # assert_equal(%w{1 2 3}, row)
64
+ #
65
+ # # shortcut interface
66
+ # row = "1;2;3".parse_csv(:col_sep => ";")
67
+ # assert_not_nil(row)
68
+ # assert_instance_of(Array, row)
69
+ # assert_equal(%w{1 2 3}, row)
70
+ #end
71
+
72
+ def test_parse_line_with_empty_lines
73
+ assert_equal(nil, FastestCSV.parse_line("")) # to signal eof
74
+ #assert_equal(Array.new, FastestCSV.parse_line("\n1,2,3"))
75
+ assert_equal([nil], FastestCSV.parse_line("\n1,2,3"))
76
+ end
77
+
78
+ def test_read_and_readlines
79
+ assert_equal( @expected,
80
+ FastestCSV.read(@path) )
81
+ assert_equal( @expected,
82
+ FastestCSV.readlines(@path))
83
+
84
+
85
+ data = FastestCSV.open(@path) do |csv|
86
+ csv.read
87
+ end
88
+ assert_equal(@expected, data)
89
+ data = FastestCSV.open(@path) do |csv|
90
+ csv.readlines
91
+ end
92
+ assert_equal(@expected, data)
93
+ end
94
+
95
+ #def test_table
96
+ # table = FastestCSV.table(@path)
97
+ # assert_instance_of(FastestCSV::Table, table)
98
+ # assert_equal([[:"1", :"2", :"3"], [4, 5, nil]], table.to_a)
99
+ #end
100
+
101
+ def test_shift # aliased as gets() and readline()
102
+ FastestCSV.open(@path, "r+") do |csv|
103
+ assert_equal(@expected.shift, csv.shift)
104
+ assert_equal(@expected.shift, csv.shift)
105
+ assert_equal(nil, csv.shift)
106
+ end
107
+ end
108
+
109
+ def test_long_line # ruby's regex parser may have problems with long rows
110
+ File.unlink(@path)
111
+
112
+ long_field_length = 2800
113
+ File.open(@path, "w") do |file|
114
+ file << "1,2,#{'3' * long_field_length}\r\n"
115
+ end
116
+ @expected = [%w{1 2} + ['3' * long_field_length]]
117
+ test_shift
118
+ end
119
+ end
metadata ADDED
@@ -0,0 +1,80 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: fastest-csv
3
+ version: !ruby/object:Gem::Version
4
+ prerelease:
5
+ version: 0.0.2
6
+ platform: java
7
+ authors:
8
+ - Maarten Oelering
9
+ autorequire:
10
+ bindir: bin
11
+ cert_chain: []
12
+
13
+ date: 2013-01-13 00:00:00 Z
14
+ dependencies:
15
+ - !ruby/object:Gem::Dependency
16
+ name: rake-compiler
17
+ prerelease: false
18
+ requirement: &id001 !ruby/object:Gem::Requirement
19
+ none: false
20
+ requirements:
21
+ - - ">="
22
+ - !ruby/object:Gem::Version
23
+ version: "0"
24
+ type: :development
25
+ version_requirements: *id001
26
+ description: Fastest standard CSV parser for MRI Ruby and JRuby
27
+ email:
28
+ - maarten@brightcode.nl
29
+ executables: []
30
+
31
+ extensions: []
32
+
33
+ extra_rdoc_files: []
34
+
35
+ files:
36
+ - .gitignore
37
+ - Gemfile
38
+ - LICENSE
39
+ - README.md
40
+ - Rakefile
41
+ - ext/csv_parser/CsvParserService.java
42
+ - ext/csv_parser/extconf.rb
43
+ - ext/csv_parser/parser.c
44
+ - fastest-csv.gemspec
45
+ - lib/fastest-csv.rb
46
+ - lib/fastest-csv/version.rb
47
+ - lib/fastest_csv.rb
48
+ - test/tc_csv_parsing.rb
49
+ - test/tc_interface.rb
50
+ - lib/csv_parser.jar
51
+ homepage: https://github.com/brightcode/fastest-csv
52
+ licenses: []
53
+
54
+ post_install_message:
55
+ rdoc_options: []
56
+
57
+ require_paths:
58
+ - lib
59
+ required_ruby_version: !ruby/object:Gem::Requirement
60
+ none: false
61
+ requirements:
62
+ - - ">="
63
+ - !ruby/object:Gem::Version
64
+ version: "0"
65
+ required_rubygems_version: !ruby/object:Gem::Requirement
66
+ none: false
67
+ requirements:
68
+ - - ">="
69
+ - !ruby/object:Gem::Version
70
+ version: "0"
71
+ requirements: []
72
+
73
+ rubyforge_project:
74
+ rubygems_version: 1.8.24
75
+ signing_key:
76
+ specification_version: 3
77
+ summary: Fastest standard CSV parser for MRI Ruby and JRuby
78
+ test_files:
79
+ - test/tc_csv_parsing.rb
80
+ - test/tc_interface.rb