csvreader 1.2.1 → 1.2.5

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,13 +1,21 @@
1
- # encoding: utf-8
2
1
 
3
2
 
4
3
  require 'pp'
5
- require 'logger'
4
+ require 'logger' ## todo/fix: check why logger is required - use logutils!!!???
6
5
  require 'forwardable'
7
6
  require 'stringio'
7
+ require 'fileutils'
8
+
9
+ require 'time'
8
10
  require 'date' ## use for Date.parse and DateTime.parse
9
11
  require 'yaml' ## used for (optional) meta data blocks
12
+ require 'json'
13
+
10
14
 
15
+ ## our own parser libs
16
+ require 'tabreader'
17
+ require 'csvjson'
18
+ require 'csvyaml'
11
19
 
12
20
 
13
21
  ###
@@ -19,6 +27,7 @@ require 'csvreader/parser_strict' # flexible (strict - no leading/trailing spa
19
27
  require 'csvreader/parser_tab'
20
28
  require 'csvreader/parser_fixed'
21
29
  require 'csvreader/parser_json'
30
+ require 'csvreader/parser_yaml'
22
31
  require 'csvreader/parser_table'
23
32
  require 'csvreader/parser'
24
33
  require 'csvreader/converter'
@@ -68,6 +77,8 @@ class Parser
68
77
  TABLE = ParserTable.new ## space-separated e.g /[ \t]+/
69
78
  FIXED = ParserFixed.new
70
79
 
80
+ JSON = ParserJson.new
81
+ YAML = ParserYaml.new
71
82
 
72
83
  def self.default() DEFAULT; end ## alternative alias for DEFAULT
73
84
  def self.numeric() NUMERIC; end
@@ -85,11 +96,17 @@ class Parser
85
96
  def self.postgres() postgresql; end
86
97
  def self.postgresql_text() POSTGRESQL_TEXT; end
87
98
  def self.postgres_text() postgresql_text; end
99
+
88
100
  def self.tab() TAB; end
89
101
  def self.table() TABLE; end
90
102
  def self.fixed() FIXED; end
91
103
  def self.fix() fixed; end
92
104
  def self.f() fixed; end
105
+
106
+ def self.json() JSON; end
107
+ def self.j() json; end
108
+ def self.yaml() YAML; end
109
+ def self.y() yaml; end
93
110
  end # class Parser
94
111
  end # class CsvReader
95
112
 
@@ -114,6 +131,8 @@ class CsvReader
114
131
  TABLE = Builder.new( Parser::TABLE )
115
132
  FIXED = Builder.new( Parser::FIXED )
116
133
 
134
+ JSON = Builder.new( Parser::JSON )
135
+ YAML = Builder.new( Parser::YAML )
117
136
 
118
137
  def self.default() DEFAULT; end ## alternative alias for DEFAULT
119
138
  def self.numeric() NUMERIC; end
@@ -131,11 +150,17 @@ class CsvReader
131
150
  def self.postgres() postgresql; end
132
151
  def self.postgresql_text() POSTGRESQL_TEXT; end
133
152
  def self.postgres_text() postgresql_text; end
153
+
134
154
  def self.tab() TAB; end
135
155
  def self.table() TABLE; end
136
156
  def self.fixed() FIXED; end
137
157
  def self.fix() fixed; end
138
158
  def self.f() fixed; end
159
+
160
+ def self.json() JSON; end
161
+ def self.j() json; end
162
+ def self.yaml() YAML; end
163
+ def self.y() yaml; end
139
164
  end # class CsvReader
140
165
 
141
166
 
@@ -158,6 +183,9 @@ class CsvHashReader
158
183
  TABLE = Builder.new( Parser::TABLE )
159
184
  FIXED = Builder.new( Parser::FIXED )
160
185
 
186
+ JSON = Builder.new( Parser::JSON )
187
+ YAML = Builder.new( Parser::YAML )
188
+
161
189
 
162
190
  def self.default() DEFAULT; end ## alternative alias for DEFAULT
163
191
  def self.numeric() NUMERIC; end
@@ -171,11 +199,17 @@ class CsvHashReader
171
199
  def self.postgres() postgresql; end
172
200
  def self.postgresql_text() POSTGRESQL_TEXT; end
173
201
  def self.postgres_text() postgresql_text; end
202
+
174
203
  def self.tab() TAB; end
175
204
  def self.table() TABLE; end
176
205
  def self.fixed() FIXED; end
177
206
  def self.fix() fixed; end
178
207
  def self.f() fixed; end
208
+
209
+ def self.json() JSON; end
210
+ def self.j() json; end
211
+ def self.yaml() YAML; end
212
+ def self.y() yaml; end
179
213
  end # class CsvHashReader
180
214
 
181
215
 
@@ -1,4 +1,3 @@
1
- # encoding: utf-8
2
1
 
3
2
  class CsvReader
4
3
  class Buffer ## todo: find a better name:
@@ -1,4 +1,3 @@
1
- # encoding: utf-8
2
1
 
3
2
  class CsvReader
4
3
  class Builder
@@ -1,4 +1,3 @@
1
- # encoding: utf-8
2
1
 
3
2
  class CsvReader
4
3
 
@@ -1,33 +1,32 @@
1
- # encoding: utf-8
2
-
3
- class CsvReader
4
-
5
- class Parser
6
- ## "forward" reference,
7
- ## see base.rb for more
8
- end
9
-
10
-
11
- ####################################
12
- # define errors / exceptions
13
- # for all parsers for (re)use
14
-
15
- class Error < StandardError
16
- end
17
-
18
- ####
19
- # todo/check:
20
- # use "common" error class - why? why not?
21
-
22
- class ParseError < Error
23
- attr_reader :message
24
-
25
- def initialize( message )
26
- @message = message
27
- end
28
-
29
- def to_s
30
- "*** csv parse error: #{@message}"
31
- end
32
- end # class ParseError
33
- end # class CsvReader
1
+
2
+ class CsvReader
3
+
4
+ class Parser
5
+ ## "forward" reference,
6
+ ## see base.rb for more
7
+ end
8
+
9
+
10
+ ####################################
11
+ # define errors / exceptions
12
+ # for all parsers for (re)use
13
+
14
+ class Error < StandardError
15
+ end
16
+
17
+ ####
18
+ # todo/check:
19
+ # use "common" error class - why? why not?
20
+
21
+ class ParseError < Error
22
+ attr_reader :message
23
+
24
+ def initialize( message )
25
+ @message = message
26
+ end
27
+
28
+ def to_s
29
+ "*** csv parse error: #{@message}"
30
+ end
31
+ end # class ParseError
32
+ end # class CsvReader
@@ -1,106 +1,105 @@
1
- # encoding: utf-8
2
-
3
- class CsvReader
4
-
5
- class ParserFixed
6
-
7
- ###################################
8
- ## add simple logger with debug flag/switch
9
- #
10
- # use Parser.debug = true # to turn on
11
- #
12
- # todo/fix: use logutils instead of std logger - why? why not?
13
-
14
- def self.build_logger()
15
- l = Logger.new( STDOUT )
16
- l.level = :info ## set to :info on start; note: is 0 (debug) by default
17
- l
18
- end
19
- def self.logger() @@logger ||= build_logger; end
20
- def logger() self.class.logger; end
21
-
22
-
23
- def parse( data, width:, &block )
24
-
25
- ## note: input: required each_line (string or io/file for example)
26
-
27
- input = data ## assume it's a string or io/file handle
28
-
29
- if block_given?
30
- parse_lines( input, width: width, &block )
31
- else
32
- records = []
33
-
34
- parse_lines( input, width: width ) do |record|
35
- records << record
36
- end
37
-
38
- records
39
- end
40
- end ## method parse
41
-
42
-
43
-
44
- private
45
-
46
- def parse_lines( input, width:, &block )
47
-
48
- ## note: each line only works with \n (windows) or \r\n (unix)
49
- ## will NOT work with \r (old mac, any others?) only!!!!
50
- input.each_line do |line|
51
-
52
- ## note: chomp('') if is an empty string,
53
- ## it will remove all trailing newlines from the string.
54
- ## use line.sub(/[\n\r]*$/, '') or similar instead - why? why not?
55
- line = line.chomp( '' )
56
- logger.debug "line:" if logger.debug?
57
- logger.debug line.pretty_inspect if logger.debug?
58
-
59
-
60
- ## skip empty lines and comments
61
- if line =~ /^[ \t]*$/ ## skip blank lines (with whitespace only)
62
- logger.debug "skip blank line" if logger.debug?
63
- next
64
- end
65
-
66
- if line =~ /^[ \t]*#/ # start_with?( "#" ) -- skip comment lines (note: allow leading whitespaces)
67
- logger.debug "skip comment line" if logger.debug?
68
- next
69
- end
70
-
71
-
72
- if width.is_a?( String )
73
- ## assume it's String#unpack format e.g.
74
- ## "209231-231992395 MoreData".unpack('aa5A1A9a4Z*')
75
- ## returns an array as follows :
76
- ## ["2", "09231", "-", "231992395", " ", "MoreData"]
77
- ## see String#unpack
78
-
79
- values = line.unpack( width )
80
- else ## assume array with integers
81
- values = []
82
- offset = 0 # start position / offset
83
- width.each_with_index do |w,i|
84
- logger.debug "[#{i}] start: #{offset}, width: #{w}" if logger.debug?
85
-
86
- if w < 0 ## convention - if width negative, skip column
87
- # note: minus (-) and minus (-) equal plus (+)
88
- ## e.g. 2 - -2 = 4
89
- offset -= w
90
- else
91
- value = line[offset, w]
92
- value = value.strip if value ## note: if not nil strip; only use rstrip (for trailing only) - why? why not?
93
- values << value
94
- offset += w
95
- end
96
- end
97
- end
98
-
99
- ## note: requires block - enforce? how? why? why not?
100
- block.call( values )
101
- end
102
- end # method parse_lines
103
-
104
-
105
- end # class ParserFixed
106
- end # class CsvReader
1
+
2
+ class CsvReader
3
+
4
+ class ParserFixed
5
+
6
+ ###################################
7
+ ## add simple logger with debug flag/switch
8
+ #
9
+ # use Parser.debug = true # to turn on
10
+ #
11
+ # todo/fix: use logutils instead of std logger - why? why not?
12
+
13
+ def self.build_logger()
14
+ l = Logger.new( STDOUT )
15
+ l.level = :info ## set to :info on start; note: is 0 (debug) by default
16
+ l
17
+ end
18
+ def self.logger() @@logger ||= build_logger; end
19
+ def logger() self.class.logger; end
20
+
21
+
22
+ def parse( data, width:, &block )
23
+
24
+ ## note: input: required each_line (string or io/file for example)
25
+
26
+ input = data ## assume it's a string or io/file handle
27
+
28
+ if block_given?
29
+ parse_lines( input, width: width, &block )
30
+ else
31
+ records = []
32
+
33
+ parse_lines( input, width: width ) do |record|
34
+ records << record
35
+ end
36
+
37
+ records
38
+ end
39
+ end ## method parse
40
+
41
+
42
+
43
+ private
44
+
45
+ def parse_lines( input, width:, &block )
46
+
47
+ ## note: each line only works with \n (windows) or \r\n (unix)
48
+ ## will NOT work with \r (old mac, any others?) only!!!!
49
+ input.each_line do |line|
50
+
51
+ ## note: chomp('') if is an empty string,
52
+ ## it will remove all trailing newlines from the string.
53
+ ## use line.sub(/[\n\r]*$/, '') or similar instead - why? why not?
54
+ line = line.chomp( '' )
55
+ logger.debug "line:" if logger.debug?
56
+ logger.debug line.pretty_inspect if logger.debug?
57
+
58
+
59
+ ## skip empty lines and comments
60
+ if line =~ /^[ \t]*$/ ## skip blank lines (with whitespace only)
61
+ logger.debug "skip blank line" if logger.debug?
62
+ next
63
+ end
64
+
65
+ if line =~ /^[ \t]*#/ # start_with?( "#" ) -- skip comment lines (note: allow leading whitespaces)
66
+ logger.debug "skip comment line" if logger.debug?
67
+ next
68
+ end
69
+
70
+
71
+ if width.is_a?( String )
72
+ ## assume it's String#unpack format e.g.
73
+ ## "209231-231992395 MoreData".unpack('aa5A1A9a4Z*')
74
+ ## returns an array as follows :
75
+ ## ["2", "09231", "-", "231992395", " ", "MoreData"]
76
+ ## see String#unpack
77
+
78
+ values = line.unpack( width )
79
+ else ## assume array with integers
80
+ values = []
81
+ offset = 0 # start position / offset
82
+ width.each_with_index do |w,i|
83
+ logger.debug "[#{i}] start: #{offset}, width: #{w}" if logger.debug?
84
+
85
+ if w < 0 ## convention - if width negative, skip column
86
+ # note: minus (-) and minus (-) equal plus (+)
87
+ ## e.g. 2 - -2 = 4
88
+ offset -= w
89
+ else
90
+ value = line[offset, w]
91
+ value = value.strip if value ## note: if not nil strip; only use rstrip (for trailing only) - why? why not?
92
+ values << value
93
+ offset += w
94
+ end
95
+ end
96
+ end
97
+
98
+ ## note: requires block - enforce? how? why? why not?
99
+ block.call( values )
100
+ end
101
+ end # method parse_lines
102
+
103
+
104
+ end # class ParserFixed
105
+ end # class CsvReader
@@ -1,5 +1,23 @@
1
- # encoding: utf-8
2
-
3
-
4
- #######
5
- ## todo: add parser for new CSV <3 JSON format
1
+
2
+
3
+ class CsvReader
4
+
5
+ class ParserJson
6
+
7
+ def parse( data, **kwargs, &block )
8
+ ## note: kwargs NOT used for now (but required for "protocol/interface" by other parsers)
9
+
10
+ ## note: input: required each_line (string or io/file for example)
11
+ ## assume data is a string or io/file handle
12
+ csv = CsvJson.new( data )
13
+
14
+ if block_given?
15
+ csv.each( &block )
16
+ else
17
+ csv.to_a
18
+ end
19
+ end ## method parse
20
+
21
+
22
+ end # class ParserJson
23
+ end # class CsvReader