csv_lazy 0.0.0

Sign up to get free protection for your applications and to get access to all the features.
data/.document ADDED
@@ -0,0 +1,5 @@
1
+ lib/**/*.rb
2
+ bin/*
3
+ -
4
+ features/**/*.feature
5
+ LICENSE.txt
data/.rspec ADDED
@@ -0,0 +1 @@
1
+ --color
data/Gemfile ADDED
@@ -0,0 +1,13 @@
1
+ source "http://rubygems.org"
2
+ # Add dependencies required to use your gem here.
3
+ # Example:
4
+ # gem "activesupport", ">= 2.3.5"
5
+
6
+ # Add dependencies to develop your gem here.
7
+ # Include everything needed to run rake, tests, features, etc.
8
+ group :development do
9
+ gem "rspec", "~> 2.8.0"
10
+ gem "rdoc", "~> 3.12"
11
+ gem "bundler", ">= 1.0.0"
12
+ gem "jeweler", "~> 1.8.4"
13
+ end
data/Gemfile.lock ADDED
@@ -0,0 +1,31 @@
1
+ GEM
2
+ remote: http://rubygems.org/
3
+ specs:
4
+ diff-lcs (1.1.3)
5
+ git (1.2.5)
6
+ jeweler (1.8.4)
7
+ bundler (~> 1.0)
8
+ git (>= 1.2.5)
9
+ rake
10
+ rdoc
11
+ json (1.7.4)
12
+ rake (0.9.2.2)
13
+ rdoc (3.12)
14
+ json (~> 1.4)
15
+ rspec (2.8.0)
16
+ rspec-core (~> 2.8.0)
17
+ rspec-expectations (~> 2.8.0)
18
+ rspec-mocks (~> 2.8.0)
19
+ rspec-core (2.8.0)
20
+ rspec-expectations (2.8.0)
21
+ diff-lcs (~> 1.1.2)
22
+ rspec-mocks (2.8.0)
23
+
24
+ PLATFORMS
25
+ ruby
26
+
27
+ DEPENDENCIES
28
+ bundler (>= 1.0.0)
29
+ jeweler (~> 1.8.4)
30
+ rdoc (~> 3.12)
31
+ rspec (~> 2.8.0)
data/LICENSE.txt ADDED
@@ -0,0 +1,20 @@
1
+ Copyright (c) 2012 Kasper Johansen
2
+
3
+ Permission is hereby granted, free of charge, to any person obtaining
4
+ a copy of this software and associated documentation files (the
5
+ "Software"), to deal in the Software without restriction, including
6
+ without limitation the rights to use, copy, modify, merge, publish,
7
+ distribute, sublicense, and/or sell copies of the Software, and to
8
+ permit persons to whom the Software is furnished to do so, subject to
9
+ the following conditions:
10
+
11
+ The above copyright notice and this permission notice shall be
12
+ included in all copies or substantial portions of the Software.
13
+
14
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
15
+ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
16
+ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
17
+ NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
18
+ LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
19
+ OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
20
+ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
data/README.rdoc ADDED
@@ -0,0 +1,19 @@
1
+ = csv_lazy
2
+
3
+ Description goes here.
4
+
5
+ == Contributing to csv_lazy
6
+
7
+ * Check out the latest master to make sure the feature hasn't been implemented or the bug hasn't been fixed yet.
8
+ * Check out the issue tracker to make sure someone already hasn't requested it and/or contributed it.
9
+ * Fork the project.
10
+ * Start a feature/bugfix branch.
11
+ * Commit and push until you are happy with your contribution.
12
+ * Make sure to add tests for it. This is important so I don't break it in a future version unintentionally.
13
+ * Please try not to mess with the Rakefile, version, or history. If you want to have your own version, or is otherwise necessary, that is fine, but please isolate to its own commit so I can cherry-pick around it.
14
+
15
+ == Copyright
16
+
17
+ Copyright (c) 2012 Kasper Johansen. See LICENSE.txt for
18
+ further details.
19
+
data/Rakefile ADDED
@@ -0,0 +1,49 @@
1
+ # encoding: utf-8
2
+
3
+ require 'rubygems'
4
+ require 'bundler'
5
+ begin
6
+ Bundler.setup(:default, :development)
7
+ rescue Bundler::BundlerError => e
8
+ $stderr.puts e.message
9
+ $stderr.puts "Run `bundle install` to install missing gems"
10
+ exit e.status_code
11
+ end
12
+ require 'rake'
13
+
14
+ require 'jeweler'
15
+ Jeweler::Tasks.new do |gem|
16
+ # gem is a Gem::Specification... see http://docs.rubygems.org/read/chapter/20 for more options
17
+ gem.name = "csv_lazy"
18
+ gem.homepage = "http://github.com/kaspernj/csv_lazy"
19
+ gem.license = "MIT"
20
+ gem.summary = %Q{A small CSV lib that skips whitespace-format-bugs and more.}
21
+ gem.description = %Q{A small CSV lib that skips whitespace-format-bugs and more.}
22
+ gem.email = "k@spernj.org"
23
+ gem.authors = ["Kasper Johansen"]
24
+ # dependencies defined in Gemfile
25
+ end
26
+ Jeweler::RubygemsDotOrgTasks.new
27
+
28
+ require 'rspec/core'
29
+ require 'rspec/core/rake_task'
30
+ RSpec::Core::RakeTask.new(:spec) do |spec|
31
+ spec.pattern = FileList['spec/**/*_spec.rb']
32
+ end
33
+
34
+ RSpec::Core::RakeTask.new(:rcov) do |spec|
35
+ spec.pattern = 'spec/**/*_spec.rb'
36
+ spec.rcov = true
37
+ end
38
+
39
+ task :default => :spec
40
+
41
+ require 'rdoc/task'
42
+ Rake::RDocTask.new do |rdoc|
43
+ version = File.exist?('VERSION') ? File.read('VERSION') : ""
44
+
45
+ rdoc.rdoc_dir = 'rdoc'
46
+ rdoc.title = "csv_lazy #{version}"
47
+ rdoc.rdoc_files.include('README*')
48
+ rdoc.rdoc_files.include('lib/**/*.rb')
49
+ end
data/VERSION ADDED
@@ -0,0 +1 @@
1
+ 0.0.0
data/csv_lazy.gemspec ADDED
@@ -0,0 +1,61 @@
1
+ # Generated by jeweler
2
+ # DO NOT EDIT THIS FILE DIRECTLY
3
+ # Instead, edit Jeweler::Tasks in Rakefile, and run 'rake gemspec'
4
+ # -*- encoding: utf-8 -*-
5
+
6
+ Gem::Specification.new do |s|
7
+ s.name = %q{csv_lazy}
8
+ s.version = "0.0.0"
9
+
10
+ s.required_rubygems_version = Gem::Requirement.new(">= 0") if s.respond_to? :required_rubygems_version=
11
+ s.authors = ["Kasper Johansen"]
12
+ s.date = %q{2012-08-09}
13
+ s.description = %q{A small CSV lib that skips whitespace-format-bugs and more.}
14
+ s.email = %q{k@spernj.org}
15
+ s.extra_rdoc_files = [
16
+ "LICENSE.txt",
17
+ "README.rdoc"
18
+ ]
19
+ s.files = [
20
+ ".document",
21
+ ".rspec",
22
+ "Gemfile",
23
+ "Gemfile.lock",
24
+ "LICENSE.txt",
25
+ "README.rdoc",
26
+ "Rakefile",
27
+ "VERSION",
28
+ "csv_lazy.gemspec",
29
+ "lib/csv_lazy.rb",
30
+ "spec/csv_lazy_spec.rb",
31
+ "spec/spec_helper.rb",
32
+ "spec/test1.csv.gz"
33
+ ]
34
+ s.homepage = %q{http://github.com/kaspernj/csv_lazy}
35
+ s.licenses = ["MIT"]
36
+ s.require_paths = ["lib"]
37
+ s.rubygems_version = %q{1.6.2}
38
+ s.summary = %q{A small CSV lib that skips whitespace-format-bugs and more.}
39
+
40
+ if s.respond_to? :specification_version then
41
+ s.specification_version = 3
42
+
43
+ if Gem::Version.new(Gem::VERSION) >= Gem::Version.new('1.2.0') then
44
+ s.add_development_dependency(%q<rspec>, ["~> 2.8.0"])
45
+ s.add_development_dependency(%q<rdoc>, ["~> 3.12"])
46
+ s.add_development_dependency(%q<bundler>, [">= 1.0.0"])
47
+ s.add_development_dependency(%q<jeweler>, ["~> 1.8.4"])
48
+ else
49
+ s.add_dependency(%q<rspec>, ["~> 2.8.0"])
50
+ s.add_dependency(%q<rdoc>, ["~> 3.12"])
51
+ s.add_dependency(%q<bundler>, [">= 1.0.0"])
52
+ s.add_dependency(%q<jeweler>, ["~> 1.8.4"])
53
+ end
54
+ else
55
+ s.add_dependency(%q<rspec>, ["~> 2.8.0"])
56
+ s.add_dependency(%q<rdoc>, ["~> 3.12"])
57
+ s.add_dependency(%q<bundler>, [">= 1.0.0"])
58
+ s.add_dependency(%q<jeweler>, ["~> 1.8.4"])
59
+ end
60
+ end
61
+
data/lib/csv_lazy.rb ADDED
@@ -0,0 +1,160 @@
1
+ class Csv_lazy
2
+ include Enumerable
3
+
4
+ def initialize(args, &blk)
5
+ @args = {
6
+ :quote_char => '"',
7
+ :row_sep => "\n",
8
+ :col_sep => ";"
9
+ }.merge(args)
10
+
11
+ @io = @args[:io]
12
+ @eof = false
13
+ @buffer = ""
14
+ @debug = @args[:debug]
15
+ #@debug = true
16
+
17
+ accepted = [:quote_char, :row_sep, :col_sep, :io]
18
+ @args.each do |key, val|
19
+ if accepted.index(key) == nil
20
+ raise "Unknown argument: '#{key}'."
21
+ end
22
+ end
23
+
24
+ raise "No ':quote_char' was given." if @args[:quote_char].to_s.strip.empty?
25
+ raise "No ':col_sep' was given." if @args[:col_sep].to_s.strip.empty?
26
+ raise "No ':row_sep' was given." if @args[:row_sep].to_s.empty?
27
+ raise "No ':io' was given." if !@args[:io]
28
+
29
+ @regex_begin_quote_char = /\A\s*#{Regexp.escape(@args[:quote_char])}/
30
+
31
+ @regex_row_end = /\A\s*?#{Regexp.escape(@args[:row_sep])}/
32
+ @regex_colsep_next = /\A#{Regexp.escape(@args[:col_sep])}/
33
+
34
+ @regex_read_until_quote_char = /\A(.*?)#{Regexp.escape(@args[:quote_char])}/
35
+ @regex_read_until_col_sep = /\A(.*?)#{Regexp.escape(@args[:col_sep])}/
36
+ @regex_read_until_row_sep = /\A(.+?)#{Regexp.escape(@args[:row_sep])}/
37
+ @regex_read_until_end = /\A(.+?)\Z/
38
+
39
+ self.each(&blk) if blk
40
+ end
41
+
42
+ #Yields each row as an array.
43
+ def each
44
+ while row = read_row
45
+ yield(row)
46
+ end
47
+ end
48
+
49
+ private
50
+
51
+ #Reads more content into the buffer.
52
+ def read_buffer
53
+ read = @io.read(4096)
54
+ if !read
55
+ @eof = true
56
+ else
57
+ @buffer << read
58
+ end
59
+ end
60
+
61
+ #Returns the next row.
62
+ def read_row
63
+ @row = []
64
+ while !@eof or !@buffer.empty?
65
+ break if !read_next_col
66
+ end
67
+
68
+ row = @row
69
+ @row = nil
70
+
71
+ puts "csv_lazy: Row: #{row}\n\n" if @debug
72
+
73
+ if row.empty?
74
+ return false
75
+ else
76
+ return row
77
+ end
78
+ end
79
+
80
+ #Runs a regex against the buffer. If matched it also removes it from the buffer.
81
+ def read_remove_regex(regex)
82
+ if match = @buffer.match(regex)
83
+ oldbuffer = @buffer
84
+ @buffer = @buffer.gsub(regex, "")
85
+
86
+ if @debug
87
+ print "csv_lazy: Regex: #{regex.to_s}\n"
88
+ print "csv_lazy: Match: #{match.to_a}\n"
89
+ print "csv_lazy: Buffer before: #{oldbuffer}\n"
90
+ print "csv_lazy: Buffer after: #{@buffer}\n"
91
+ print "\n"
92
+ end
93
+
94
+ raise "Buffer was the same before regex?" if oldbuffer == @buffer
95
+ return match
96
+ end
97
+
98
+ return false
99
+ end
100
+
101
+ #Adds the next column to the row. Returns true if more columns should be read or false if this was the end of the row.
102
+ def read_next_col
103
+ read_buffer if @buffer.length < 4096
104
+ return false if @buffer.empty? and @eof
105
+
106
+ if @buffer.empty? or read_remove_regex(@regex_row_end)
107
+ return false
108
+ elsif match = read_remove_regex(@regex_begin_quote_char)
109
+ read = ""
110
+
111
+ loop do
112
+ match_read = read_remove_regex(@regex_read_until_quote_char)
113
+ if !match_read
114
+ read_buffer
115
+ else
116
+ @row << match_read[1]
117
+ break
118
+ end
119
+ end
120
+
121
+ read_buffer if @buffer.length < 4096
122
+
123
+ if read_remove_regex(@regex_colsep_next)
124
+ return true
125
+ elsif @eof and @buffer.empty?
126
+ puts "csv_lazy: End-of-file and empty buffer." if @debug
127
+ return false
128
+ elsif read_remove_regex(@regex_row_end)
129
+ puts "csv_lazy: Row-end found." if @debug
130
+ return false
131
+ else
132
+ raise "Dont know what to do (#{@buffer.length}): #{@buffer}"
133
+ end
134
+ elsif match = read_remove_regex(@regex_read_until_col_sep)
135
+ @row << match[1]
136
+ return true
137
+ elsif match = read_remove_regex(@regex_read_until_row_sep)
138
+ puts "csv_lazy: Row seperator reached." if @debug
139
+ @row << match[1]
140
+ return false
141
+ elsif match = read_remove_regex(@regex_read_until_end)
142
+ if @eof
143
+ @row << match[1]
144
+ return false
145
+ end
146
+
147
+ @buffer << match[0]
148
+ read_buffer
149
+ raise Csv_lazy::Retry
150
+ else
151
+ raise "Dont know what to do with buffer: #{@buffer}"
152
+ end
153
+ rescue Csv_lazy::Retry
154
+ retry
155
+ end
156
+ end
157
+
158
+ class Csv_lazy::Retry < RuntimeError
159
+
160
+ end
@@ -0,0 +1,67 @@
1
+ require File.expand_path(File.dirname(__FILE__) + '/spec_helper')
2
+
3
+ describe "CsvLazy" do
4
+ it "should be able to read CSV" do
5
+ cont = "1;2;3;4;5\n6;7;8;9;10"
6
+
7
+ count = 0
8
+ Csv_lazy.new(:io => StringIO.new(cont)) do |csv|
9
+ raise "Expected count of 5 but it wasnt: #{csv.length} (#{csv})" if csv.length != 5
10
+
11
+ csv.each do |csv_ele|
12
+ raise "Expected numeric value but it wasnt: '#{csv_ele}'." if !csv_ele.to_s.match(/^(\d+)$/)
13
+ end
14
+
15
+ count += 1
16
+ end
17
+
18
+ raise "Expected 2 rows but got #{count}" if count != 2
19
+ end
20
+
21
+ it "should be able to read mixed CSV" do
22
+ cont = "1;\"2\";3;\"4\";5\n6;7;8;9;10"
23
+
24
+ count = 0
25
+ Csv_lazy.new(:io => StringIO.new(cont)) do |csv|
26
+ raise "Expected count of 5 but it wasnt: #{csv.length} (#{csv})" if csv.length != 5
27
+
28
+ csv.each do |csv_ele|
29
+ raise "Expected numeric value but it wasnt: '#{csv_ele}'." if !csv_ele.to_s.match(/^(\d+)$/)
30
+ end
31
+
32
+ count += 1
33
+ end
34
+
35
+ raise "Expected 2 rows but got #{count}" if count != 2
36
+ end
37
+
38
+ it "should be able to handle ending whitespaces" do
39
+ cont = "1;2;3;4;\"5\" \n6;7;8;9;\"10\""
40
+
41
+ count = 0
42
+ Csv_lazy.new(:io => StringIO.new(cont)) do |csv|
43
+ raise "Expected count of 5 but it wasnt: #{csv.length} (#{csv})" if csv.length != 5
44
+
45
+ csv.each do |csv_ele|
46
+ raise "Expected numeric value but it wasnt: '#{csv_ele}'." if !csv_ele.to_s.match(/^(\d+)$/)
47
+ end
48
+
49
+ count += 1
50
+ end
51
+
52
+ raise "Expected 2 rows but got #{count}" if count != 2
53
+ end
54
+
55
+ it "should read sample 1" do
56
+ count = 0
57
+ Zlib::GzipReader.open("#{File.dirname(__FILE__)}/test1.csv.gz") do |gz|
58
+ Csv_lazy.new(:io => gz, :col_sep => ",", :row_sep => "\r\n") do |row|
59
+ raise "Expected length of 32 but it wasnt: #{row.length}" if row.length != 32
60
+ raise "Expected C-format or 'contract_id' column as the first but it wasnt: #{row[0]}" if !row[0].to_s.match(/^C(\d+)$/) and row[0] != "contract_id"
61
+ count += 1
62
+ end
63
+ end
64
+
65
+ raise "Expected 23 rows but got #{count}" if count != 23
66
+ end
67
+ end
@@ -0,0 +1,12 @@
1
+ $LOAD_PATH.unshift(File.join(File.dirname(__FILE__), '..', 'lib'))
2
+ $LOAD_PATH.unshift(File.dirname(__FILE__))
3
+ require 'rspec'
4
+ require 'csv_lazy'
5
+
6
+ # Requires supporting files with custom matchers and macros, etc,
7
+ # in ./support/ and its subdirectories.
8
+ Dir["#{File.dirname(__FILE__)}/support/**/*.rb"].each {|f| require f}
9
+
10
+ RSpec.configure do |config|
11
+
12
+ end
data/spec/test1.csv.gz ADDED
Binary file
metadata ADDED
@@ -0,0 +1,115 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: csv_lazy
3
+ version: !ruby/object:Gem::Version
4
+ prerelease:
5
+ version: 0.0.0
6
+ platform: ruby
7
+ authors:
8
+ - Kasper Johansen
9
+ autorequire:
10
+ bindir: bin
11
+ cert_chain: []
12
+
13
+ date: 2012-08-09 00:00:00 +02:00
14
+ default_executable:
15
+ dependencies:
16
+ - !ruby/object:Gem::Dependency
17
+ name: rspec
18
+ requirement: &id001 !ruby/object:Gem::Requirement
19
+ none: false
20
+ requirements:
21
+ - - ~>
22
+ - !ruby/object:Gem::Version
23
+ version: 2.8.0
24
+ type: :development
25
+ prerelease: false
26
+ version_requirements: *id001
27
+ - !ruby/object:Gem::Dependency
28
+ name: rdoc
29
+ requirement: &id002 !ruby/object:Gem::Requirement
30
+ none: false
31
+ requirements:
32
+ - - ~>
33
+ - !ruby/object:Gem::Version
34
+ version: "3.12"
35
+ type: :development
36
+ prerelease: false
37
+ version_requirements: *id002
38
+ - !ruby/object:Gem::Dependency
39
+ name: bundler
40
+ requirement: &id003 !ruby/object:Gem::Requirement
41
+ none: false
42
+ requirements:
43
+ - - ">="
44
+ - !ruby/object:Gem::Version
45
+ version: 1.0.0
46
+ type: :development
47
+ prerelease: false
48
+ version_requirements: *id003
49
+ - !ruby/object:Gem::Dependency
50
+ name: jeweler
51
+ requirement: &id004 !ruby/object:Gem::Requirement
52
+ none: false
53
+ requirements:
54
+ - - ~>
55
+ - !ruby/object:Gem::Version
56
+ version: 1.8.4
57
+ type: :development
58
+ prerelease: false
59
+ version_requirements: *id004
60
+ description: A small CSV lib that skips whitespace-format-bugs and more.
61
+ email: k@spernj.org
62
+ executables: []
63
+
64
+ extensions: []
65
+
66
+ extra_rdoc_files:
67
+ - LICENSE.txt
68
+ - README.rdoc
69
+ files:
70
+ - .document
71
+ - .rspec
72
+ - Gemfile
73
+ - Gemfile.lock
74
+ - LICENSE.txt
75
+ - README.rdoc
76
+ - Rakefile
77
+ - VERSION
78
+ - csv_lazy.gemspec
79
+ - lib/csv_lazy.rb
80
+ - spec/csv_lazy_spec.rb
81
+ - spec/spec_helper.rb
82
+ - spec/test1.csv.gz
83
+ has_rdoc: true
84
+ homepage: http://github.com/kaspernj/csv_lazy
85
+ licenses:
86
+ - MIT
87
+ post_install_message:
88
+ rdoc_options: []
89
+
90
+ require_paths:
91
+ - lib
92
+ required_ruby_version: !ruby/object:Gem::Requirement
93
+ none: false
94
+ requirements:
95
+ - - ">="
96
+ - !ruby/object:Gem::Version
97
+ hash: 4190482346075524299
98
+ segments:
99
+ - 0
100
+ version: "0"
101
+ required_rubygems_version: !ruby/object:Gem::Requirement
102
+ none: false
103
+ requirements:
104
+ - - ">="
105
+ - !ruby/object:Gem::Version
106
+ version: "0"
107
+ requirements: []
108
+
109
+ rubyforge_project:
110
+ rubygems_version: 1.6.2
111
+ signing_key:
112
+ specification_version: 3
113
+ summary: A small CSV lib that skips whitespace-format-bugs and more.
114
+ test_files: []
115
+