fastest-csv 0.0.2-java → 0.0.4-java

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,89 @@
1
+ //
2
+ // Copyright (c) Maarten Oelering, BrightCode BV
3
+ //
4
+
5
+ package org.brightcode;
6
+
7
+ import java.util.ArrayList;
8
+ import java.util.List;
9
+
10
+ public class CsvParser {
11
+
12
+ private static int DEF_ARRAY_LEN = 32;
13
+
14
+ private static int UNQUOTED = 0;
15
+ private static int IN_QUOTED = 1;
16
+ private static int QUOTE_IN_QUOTED = 2;
17
+
18
+ public static List parseLine(String line) {
19
+ int length = line.length();
20
+ if (length == 0)
21
+ return null;
22
+
23
+ int state = UNQUOTED;
24
+ StringBuilder value = new StringBuilder(length); // field value, no longer than line
25
+ List<String> array = new ArrayList<String>(DEF_ARRAY_LEN);
26
+
27
+ for (int i = 0; i < length; i++) {
28
+ char c = line.charAt(i);
29
+ switch (c) {
30
+ case ',':
31
+ if (state == UNQUOTED) {
32
+ if (value.length() == 0) {
33
+ array.add(null);
34
+ }
35
+ else {
36
+ array.add(value.toString());
37
+ value.setLength(0);
38
+ }
39
+ }
40
+ else if (state == IN_QUOTED) {
41
+ value.append(c);
42
+ }
43
+ else if (state == 2) {
44
+ array.add(value.toString());
45
+ value.setLength(0);
46
+ state = UNQUOTED;
47
+ }
48
+ break;
49
+ case '"':
50
+ if (state == UNQUOTED) {
51
+ state = IN_QUOTED;
52
+ }
53
+ else if (state == IN_QUOTED) {
54
+ state = QUOTE_IN_QUOTED;
55
+ }
56
+ else if (state == QUOTE_IN_QUOTED) {
57
+ value.append(c); // escaped quote
58
+ state = IN_QUOTED;
59
+ }
60
+ break;
61
+ case '\r':
62
+ case '\n':
63
+ if (state == IN_QUOTED) {
64
+ value.append(c);
65
+ }
66
+ else {
67
+ i = length; // only parse first line if multiline
68
+ }
69
+ break;
70
+ default:
71
+ value.append(c);
72
+ break;
73
+ }
74
+ }
75
+ if (state == UNQUOTED) {
76
+ if (value.length() == 0) {
77
+ array.add(null);
78
+ }
79
+ else {
80
+ array.add(value.toString());
81
+ value.setLength(0);
82
+ }
83
+ }
84
+ else if (state == QUOTE_IN_QUOTED) {
85
+ array.add(value.toString());
86
+ }
87
+ return array;
88
+ }
89
+ }
@@ -19,6 +19,8 @@ public class CsvParserService implements BasicLibraryService {
19
19
 
20
20
  private Ruby runtime;
21
21
 
22
+ private static int DEF_ARRAY_LEN = 32;
23
+
22
24
  private static int UNQUOTED = 0;
23
25
  private static int IN_QUOTED = 1;
24
26
  private static int QUOTE_IN_QUOTED = 2;
@@ -29,6 +31,7 @@ public class CsvParserService implements BasicLibraryService {
29
31
  this.runtime = runtime;
30
32
 
31
33
  RubyModule mCsvParser = runtime.defineModule("CsvParser");
34
+ // TODO: CallbackFactory#getSingletonMethod is deprecated
32
35
  CallbackFactory callbackFactory = runtime.callbackFactory(CsvParserService.class);
33
36
  mCsvParser.defineModuleFunction("parse_line",
34
37
  callbackFactory.getSingletonMethod("parseLine", RubyString.class));
@@ -45,7 +48,7 @@ public class CsvParserService implements BasicLibraryService {
45
48
 
46
49
  int state = UNQUOTED;
47
50
  StringBuilder value = new StringBuilder(length); // field value, no longer than line
48
- RubyArray array = RubyArray.newArray(runtime, 36);
51
+ RubyArray array = RubyArray.newArray(runtime, DEF_ARRAY_LEN);
49
52
 
50
53
  for (int i = 0; i < length; i++) {
51
54
  char c = seq.charAt(i);
@@ -9,16 +9,12 @@
9
9
  #include "ruby/io.h"
10
10
  #endif
11
11
 
12
- /* default allocated size is 16 */
13
12
  #define DEF_ARRAY_LEN 32
14
13
 
15
14
  #define UNQUOTED 0
16
15
  #define IN_QUOTED 1
17
16
  #define QUOTE_IN_QUOTED 2
18
17
 
19
- /*
20
- static VALUE cFastestCSV;
21
- */
22
18
  static VALUE mCsvParser;
23
19
 
24
20
  static VALUE parse_line(VALUE self, VALUE str)
@@ -32,7 +28,7 @@ static VALUE parse_line(VALUE self, VALUE str)
32
28
  if (len == 0)
33
29
  return Qnil;
34
30
 
35
- VALUE array = rb_ary_new2(DEF_ARRAY_LEN);
31
+ VALUE array = rb_ary_new2(DEF_ARRAY_LEN); /* default allocated size is 16 */
36
32
  char value[len]; /* field value, no longer than line */
37
33
  int state = 0;
38
34
  int index = 0;
@@ -94,10 +90,6 @@ static VALUE parse_line(VALUE self, VALUE str)
94
90
 
95
91
  void Init_csv_parser()
96
92
  {
97
- /*
98
- cFastestCSV = rb_define_class("FastestCSV", rb_cObject);
99
- rb_define_singleton_method(cFastestCSV, "parse_line", parse_line, 1);
100
- */
101
93
  mCsvParser = rb_define_module("CsvParser");
102
94
  rb_define_module_function(mCsvParser, "parse_line", parse_line, 1);
103
95
  }
@@ -1,3 +1,3 @@
1
1
  class FastestCSV
2
- VERSION = "0.0.2"
2
+ VERSION = "0.0.4"
3
3
  end
data/lib/fastest_csv.rb CHANGED
@@ -5,10 +5,15 @@ require 'stringio'
5
5
 
6
6
  # Fast CSV parser using native code
7
7
  class FastestCSV
8
+ include Enumerable
8
9
 
9
10
  if RUBY_PLATFORM =~ /java/
10
- require 'jruby'
11
- org.brightcode.CsvParserService.new.basicLoad(JRuby.runtime)
11
+ if JRUBY_VERSION =~ /^1\.[0-6]/
12
+ require 'jruby'
13
+ org.brightcode.CsvParserService.new.basicLoad(JRuby.runtime)
14
+ else
15
+ include_package "org.brightcode"
16
+ end
12
17
  end
13
18
 
14
19
  # Pass each line of the specified +path+ as array to the provided +block+
@@ -58,7 +63,7 @@ class FastestCSV
58
63
  end
59
64
 
60
65
  def self.parse_line(line)
61
- ::CsvParser.parse_line(line)
66
+ CsvParser.parse_line(line)
62
67
  end
63
68
 
64
69
  # Create new FastestCSV wrapping the specified IO object
@@ -68,8 +73,12 @@ class FastestCSV
68
73
 
69
74
  # Read from the wrapped IO passing each line as array to the specified block
70
75
  def each
71
- while row = shift
72
- yield row
76
+ if block_given?
77
+ while row = shift
78
+ yield row
79
+ end
80
+ else
81
+ to_enum # return enumerator
73
82
  end
74
83
  end
75
84
 
@@ -80,11 +89,16 @@ class FastestCSV
80
89
  table
81
90
  end
82
91
  alias_method :readlines, :read
92
+
93
+ # Rewind the underlying IO object and reset line counter
94
+ def rewind
95
+ @io.rewind
96
+ end
83
97
 
84
98
  # Read next line from the wrapped IO and return as array or nil at EOF
85
99
  def shift
86
100
  if line = @io.gets
87
- ::CsvParser.parse_line(line)
101
+ CsvParser.parse_line(line)
88
102
  else
89
103
  nil
90
104
  end
@@ -105,7 +119,7 @@ end
105
119
  class String
106
120
  # Equivalent to <tt>FasterCSV::parse_line(self)</tt>
107
121
  def parse_csv
108
- ::CsvParser.parse_line(self)
122
+ CsvParser.parse_line(self)
109
123
  end
110
124
  end
111
125
 
@@ -13,6 +13,10 @@ require 'fastest_csv'
13
13
  #
14
14
  class TestCSVParsing < Test::Unit::TestCase
15
15
 
16
+ if RUBY_PLATFORM =~ /java/
17
+ include_package "org.brightcode"
18
+ end
19
+
16
20
  def test_mastering_regex_example
17
21
  ex = %Q{Ten Thousand,10000, 2710 ,,"10,000","It's ""10 Grand"", baby",10K}
18
22
  assert_equal( [ "Ten Thousand", "10000", " 2710 ", nil, "10,000",
data/test/tc_interface.rb CHANGED
@@ -116,4 +116,13 @@ class TestFastestCSVInterface < Test::Unit::TestCase
116
116
  @expected = [%w{1 2} + ['3' * long_field_length]]
117
117
  test_shift
118
118
  end
119
+
120
+ def test_enumerable
121
+ FastestCSV.open(@path) do |csv|
122
+ assert(csv.include?(["1", "2", "3"]))
123
+ csv.rewind
124
+ assert_equal([["1", "2", "3"], ["4", "5"]], csv.to_a)
125
+ end
126
+ end
127
+
119
128
  end
metadata CHANGED
@@ -1,80 +1,79 @@
1
- --- !ruby/object:Gem::Specification
1
+ --- !ruby/object:Gem::Specification
2
2
  name: fastest-csv
3
- version: !ruby/object:Gem::Version
4
- prerelease:
5
- version: 0.0.2
3
+ version: !ruby/object:Gem::Version
4
+ prerelease:
5
+ version: 0.0.4
6
6
  platform: java
7
- authors:
8
- - Maarten Oelering
9
- autorequire:
7
+ authors:
8
+ - Maarten Oelering
9
+ autorequire:
10
10
  bindir: bin
11
11
  cert_chain: []
12
-
13
- date: 2013-01-13 00:00:00 Z
14
- dependencies:
15
- - !ruby/object:Gem::Dependency
16
- name: rake-compiler
17
- prerelease: false
18
- requirement: &id001 !ruby/object:Gem::Requirement
19
- none: false
20
- requirements:
21
- - - ">="
22
- - !ruby/object:Gem::Version
23
- version: "0"
24
- type: :development
25
- version_requirements: *id001
12
+ date: 2013-08-16 00:00:00.000000000 Z
13
+ dependencies:
14
+ - !ruby/object:Gem::Dependency
15
+ name: rake-compiler
16
+ version_requirements: !ruby/object:Gem::Requirement
17
+ requirements:
18
+ - - '>='
19
+ - !ruby/object:Gem::Version
20
+ version: '0'
21
+ none: false
22
+ requirement: !ruby/object:Gem::Requirement
23
+ requirements:
24
+ - - '>='
25
+ - !ruby/object:Gem::Version
26
+ version: '0'
27
+ none: false
28
+ prerelease: false
29
+ type: :development
26
30
  description: Fastest standard CSV parser for MRI Ruby and JRuby
27
- email:
28
- - maarten@brightcode.nl
31
+ email:
32
+ - maarten@brightcode.nl
29
33
  executables: []
30
-
31
34
  extensions: []
32
-
33
35
  extra_rdoc_files: []
34
-
35
- files:
36
- - .gitignore
37
- - Gemfile
38
- - LICENSE
39
- - README.md
40
- - Rakefile
41
- - ext/csv_parser/CsvParserService.java
42
- - ext/csv_parser/extconf.rb
43
- - ext/csv_parser/parser.c
44
- - fastest-csv.gemspec
45
- - lib/fastest-csv.rb
46
- - lib/fastest-csv/version.rb
47
- - lib/fastest_csv.rb
48
- - test/tc_csv_parsing.rb
49
- - test/tc_interface.rb
50
- - lib/csv_parser.jar
36
+ files:
37
+ - .gitignore
38
+ - Gemfile
39
+ - LICENSE
40
+ - README.md
41
+ - Rakefile
42
+ - ext/csv_parser/CsvParser.java
43
+ - ext/csv_parser/CsvParserService.java
44
+ - ext/csv_parser/extconf.rb
45
+ - ext/csv_parser/parser.c
46
+ - fastest-csv.gemspec
47
+ - lib/fastest-csv.rb
48
+ - lib/fastest-csv/version.rb
49
+ - lib/fastest_csv.rb
50
+ - test/tc_csv_parsing.rb
51
+ - test/tc_interface.rb
52
+ - lib/csv_parser.jar
51
53
  homepage: https://github.com/brightcode/fastest-csv
52
54
  licenses: []
53
-
54
- post_install_message:
55
+ post_install_message:
55
56
  rdoc_options: []
56
-
57
- require_paths:
58
- - lib
59
- required_ruby_version: !ruby/object:Gem::Requirement
57
+ require_paths:
58
+ - lib
59
+ required_ruby_version: !ruby/object:Gem::Requirement
60
+ requirements:
61
+ - - '>='
62
+ - !ruby/object:Gem::Version
63
+ version: '0'
60
64
  none: false
61
- requirements:
62
- - - ">="
63
- - !ruby/object:Gem::Version
64
- version: "0"
65
- required_rubygems_version: !ruby/object:Gem::Requirement
65
+ required_rubygems_version: !ruby/object:Gem::Requirement
66
+ requirements:
67
+ - - '>='
68
+ - !ruby/object:Gem::Version
69
+ version: '0'
66
70
  none: false
67
- requirements:
68
- - - ">="
69
- - !ruby/object:Gem::Version
70
- version: "0"
71
71
  requirements: []
72
-
73
- rubyforge_project:
72
+ rubyforge_project:
74
73
  rubygems_version: 1.8.24
75
- signing_key:
74
+ signing_key:
76
75
  specification_version: 3
77
76
  summary: Fastest standard CSV parser for MRI Ruby and JRuby
78
- test_files:
79
- - test/tc_csv_parsing.rb
80
- - test/tc_interface.rb
77
+ test_files:
78
+ - test/tc_csv_parsing.rb
79
+ - test/tc_interface.rb