csvpp 0.3.0

Sign up to get free protection for your applications and to get access to all the features.
@@ -0,0 +1,162 @@
1
+ module CSVPP
2
+ class Parser
3
+ include Conversions
4
+
5
+ attr_reader :format, :col_sep
6
+
7
+ # @param input [String] path to input file
8
+ # @param format [Format]
9
+ # @param col_sep [String]
10
+ #
11
+ # @return [Array<Object>]
12
+ def self.parse(input:,
13
+ format:,
14
+ col_sep: DEFAULT_COL_SEP,
15
+ convert_type: true,
16
+ &block)
17
+
18
+ new(
19
+ format: format,
20
+ col_sep: col_sep,
21
+ convert_type: convert_type,
22
+ ).parse(input, &block)
23
+ end
24
+
25
+ # @param input [String] input string
26
+ # @param format [Format]
27
+ # @param col_sep [String]
28
+ #
29
+ # @return [Array<Object>]
30
+ def self.parse_str(input:,
31
+ format:,
32
+ col_sep: DEFAULT_COL_SEP,
33
+ convert_type: true,
34
+ &block)
35
+
36
+ new(
37
+ format: format,
38
+ col_sep: col_sep,
39
+ convert_type: convert_type,
40
+ ).parse_str(input, &block)
41
+ end
42
+
43
+ def initialize(format:, col_sep: DEFAULT_COL_SEP, convert_type: true)
44
+ @format = format
45
+ @col_sep = col_sep
46
+ @convert_type = convert_type
47
+ end
48
+
49
+ def convert_type?
50
+ !!@convert_type
51
+ end
52
+
53
+ def parse(path, &block)
54
+ parse_io(File.open(path), &block)
55
+ end
56
+
57
+ def parse_str(str, &block)
58
+ parse_io(str, &block)
59
+ end
60
+
61
+ def multiline?
62
+ format.multiline?
63
+ end
64
+
65
+ private
66
+
67
+ def set_value!(hash, var, value)
68
+ hash[var] = value
69
+
70
+ if convert_type?
71
+ type = format.type(var)
72
+ return if type.nil?
73
+
74
+ hash[var] = convert(value,
75
+ to: type,
76
+ missings: format.missings(var),
77
+ true_values: format.true_values(var),
78
+ false_values: format.false_values(var))
79
+ end
80
+ end
81
+
82
+ def add_result!(results, hash, &block)
83
+ if block_given? && (obj = block.call(hash))
84
+ results << obj
85
+ else
86
+ results << hash
87
+ end
88
+ end
89
+
90
+ def parse_io(io, &block)
91
+ return parse_multiline(io, &block) if multiline?
92
+
93
+ results = []
94
+
95
+ each_line_with_index(io) do |line, index|
96
+ line_number = index + 1
97
+ columns = line.split(col_sep, -1)
98
+
99
+ hash = {}
100
+ format.var_names.each do |var|
101
+ hash["line_number"] = line_number
102
+
103
+ index = format.index(var)
104
+ value = columns[index].strip
105
+ set_value!(hash, var, value)
106
+ end
107
+
108
+ add_result!(results, hash, &block)
109
+ end
110
+
111
+ results
112
+ end
113
+
114
+ def parse_multiline(io, &block)
115
+ results = []
116
+ hash = nil
117
+
118
+ each_line_with_index(io) do |line, index|
119
+ line_number = index + 1
120
+ columns = line.split(col_sep, -1)
121
+ line_id = columns[0]
122
+
123
+ # If we reach a start of a group...
124
+ if multiline_start?(line_id)
125
+ # ...yield the previous group...
126
+ add_result!(results, hash, &block) if hash
127
+
128
+ # ...and start building a new one.
129
+ hash = {}
130
+ hash["line_number"] = line_number
131
+ end
132
+
133
+ next if hash.nil?
134
+
135
+ format.vars_for_line(line_id).each do |var|
136
+ index = format.index(var)
137
+ value = columns[index].strip
138
+ set_value!(hash, var, value)
139
+ end
140
+ end
141
+
142
+ # Yield the last group.
143
+ add_result!(results, hash, &block) if hash
144
+
145
+ results
146
+ end
147
+
148
+ def multiline_start?(line_id)
149
+ format.multiline_start?(line_id)
150
+ end
151
+
152
+ # Yield each line and corresponding index of io to given block, but skipping
153
+ # the first lines according to the skip parameter defined in format.
154
+ def each_line_with_index(io)
155
+ offset = format.skip
156
+ io.each_line.with_index do |line, index|
157
+ yield(line, index) unless index < offset
158
+ end
159
+ end
160
+
161
+ end
162
+ end
@@ -0,0 +1,3 @@
1
+ module CSVPP
2
+ VERSION = "0.3.0"
3
+ end
data/lib/csvpp.rb ADDED
@@ -0,0 +1,49 @@
1
+ require 'oj'
2
+
3
+ require_relative './csvpp/version'
4
+ require_relative './csvpp/conversions'
5
+ require_relative './csvpp/format'
6
+ require_relative './csvpp/parser'
7
+
8
+ module CSVPP
9
+
10
+ DEFAULT_COL_SEP = '|'
11
+
12
+ # @param input [String] path to input file
13
+ # @param format [String] path to format file
14
+ # @param col_sep [String]
15
+ #
16
+ # @return [Array<Object>]
17
+ def self.parse(input:, format:, col_sep: DEFAULT_COL_SEP, &block)
18
+ Parser.parse(
19
+ input: input,
20
+ format: Format.load(format),
21
+ col_sep: col_sep,
22
+ &block
23
+ )
24
+ end
25
+
26
+ # @param input [String] input string
27
+ # @param format [String] format string
28
+ # @param col_sep [String]
29
+ #
30
+ # @return [Array<Object>]
31
+ def self.parse_str(input:, format:, col_sep: DEFAULT_COL_SEP, &block)
32
+ Parser.parse_str(
33
+ input: input,
34
+ format: Format.load_from_str(format),
35
+ col_sep: col_sep,
36
+ &block
37
+ )
38
+ end
39
+
40
+ # @param input [String] input string
41
+ # @param format [String] format string
42
+ # @param col_sep [String]
43
+ #
44
+ # @return [String]
45
+ def self.json(input:, format:, col_sep: DEFAULT_COL_SEP)
46
+ h = { 'vars' => parse_str(input: input, format: format, col_sep: col_sep) }
47
+ Oj.dump(h)
48
+ end
49
+ end
data/ui/.gitignore ADDED
@@ -0,0 +1,21 @@
1
+ # See https://help.github.com/ignore-files/ for more about ignoring files.
2
+
3
+ # dependencies
4
+ /node_modules
5
+
6
+ # testing
7
+ /coverage
8
+
9
+ # production
10
+ /build
11
+
12
+ # misc
13
+ .DS_Store
14
+ .env.local
15
+ .env.development.local
16
+ .env.test.local
17
+ .env.production.local
18
+
19
+ npm-debug.log*
20
+ yarn-debug.log*
21
+ yarn-error.log*