csvpp 0.3.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,162 @@
1
+ module CSVPP
2
+ class Parser
3
+ include Conversions
4
+
5
+ attr_reader :format, :col_sep
6
+
7
+ # @param input [String] path to input file
8
+ # @param format [Format]
9
+ # @param col_sep [String]
10
+ #
11
+ # @return [Array<Object>]
12
+ def self.parse(input:,
13
+ format:,
14
+ col_sep: DEFAULT_COL_SEP,
15
+ convert_type: true,
16
+ &block)
17
+
18
+ new(
19
+ format: format,
20
+ col_sep: col_sep,
21
+ convert_type: convert_type,
22
+ ).parse(input, &block)
23
+ end
24
+
25
+ # @param input [String] input string
26
+ # @param format [Format]
27
+ # @param col_sep [String]
28
+ #
29
+ # @return [Array<Object>]
30
+ def self.parse_str(input:,
31
+ format:,
32
+ col_sep: DEFAULT_COL_SEP,
33
+ convert_type: true,
34
+ &block)
35
+
36
+ new(
37
+ format: format,
38
+ col_sep: col_sep,
39
+ convert_type: convert_type,
40
+ ).parse_str(input, &block)
41
+ end
42
+
43
+ def initialize(format:, col_sep: DEFAULT_COL_SEP, convert_type: true)
44
+ @format = format
45
+ @col_sep = col_sep
46
+ @convert_type = convert_type
47
+ end
48
+
49
+ def convert_type?
50
+ !!@convert_type
51
+ end
52
+
53
+ def parse(path, &block)
54
+ parse_io(File.open(path), &block)
55
+ end
56
+
57
+ def parse_str(str, &block)
58
+ parse_io(str, &block)
59
+ end
60
+
61
+ def multiline?
62
+ format.multiline?
63
+ end
64
+
65
+ private
66
+
67
+ def set_value!(hash, var, value)
68
+ hash[var] = value
69
+
70
+ if convert_type?
71
+ type = format.type(var)
72
+ return if type.nil?
73
+
74
+ hash[var] = convert(value,
75
+ to: type,
76
+ missings: format.missings(var),
77
+ true_values: format.true_values(var),
78
+ false_values: format.false_values(var))
79
+ end
80
+ end
81
+
82
+ def add_result!(results, hash, &block)
83
+ if block_given? && (obj = block.call(hash))
84
+ results << obj
85
+ else
86
+ results << hash
87
+ end
88
+ end
89
+
90
+ def parse_io(io, &block)
91
+ return parse_multiline(io, &block) if multiline?
92
+
93
+ results = []
94
+
95
+ each_line_with_index(io) do |line, index|
96
+ line_number = index + 1
97
+ columns = line.split(col_sep, -1)
98
+
99
+ hash = {}
100
+ format.var_names.each do |var|
101
+ hash["line_number"] = line_number
102
+
103
+ index = format.index(var)
104
+ value = columns[index].strip
105
+ set_value!(hash, var, value)
106
+ end
107
+
108
+ add_result!(results, hash, &block)
109
+ end
110
+
111
+ results
112
+ end
113
+
114
+ def parse_multiline(io, &block)
115
+ results = []
116
+ hash = nil
117
+
118
+ each_line_with_index(io) do |line, index|
119
+ line_number = index + 1
120
+ columns = line.split(col_sep, -1)
121
+ line_id = columns[0]
122
+
123
+ # If we reach a start of a group...
124
+ if multiline_start?(line_id)
125
+ # ...yield the previous group...
126
+ add_result!(results, hash, &block) if hash
127
+
128
+ # ...and start building a new one.
129
+ hash = {}
130
+ hash["line_number"] = line_number
131
+ end
132
+
133
+ next if hash.nil?
134
+
135
+ format.vars_for_line(line_id).each do |var|
136
+ index = format.index(var)
137
+ value = columns[index].strip
138
+ set_value!(hash, var, value)
139
+ end
140
+ end
141
+
142
+ # Yield the last group.
143
+ add_result!(results, hash, &block) if hash
144
+
145
+ results
146
+ end
147
+
148
+ def multiline_start?(line_id)
149
+ format.multiline_start?(line_id)
150
+ end
151
+
152
+ # Yield each line and corresponding index of io to given block, but skipping
153
+ # the first lines according to the skip parameter defined in format.
154
+ def each_line_with_index(io)
155
+ offset = format.skip
156
+ io.each_line.with_index do |line, index|
157
+ yield(line, index) unless index < offset
158
+ end
159
+ end
160
+
161
+ end
162
+ end
@@ -0,0 +1,3 @@
1
+ module CSVPP
2
+ VERSION = "0.3.0"
3
+ end
data/lib/csvpp.rb ADDED
@@ -0,0 +1,49 @@
1
+ require 'oj'
2
+
3
+ require_relative './csvpp/version'
4
+ require_relative './csvpp/conversions'
5
+ require_relative './csvpp/format'
6
+ require_relative './csvpp/parser'
7
+
8
+ module CSVPP
9
+
10
+ DEFAULT_COL_SEP = '|'
11
+
12
+ # @param input [String] path to input file
13
+ # @param format [String] path to format file
14
+ # @param col_sep [String]
15
+ #
16
+ # @return [Array<Object>]
17
+ def self.parse(input:, format:, col_sep: DEFAULT_COL_SEP, &block)
18
+ Parser.parse(
19
+ input: input,
20
+ format: Format.load(format),
21
+ col_sep: col_sep,
22
+ &block
23
+ )
24
+ end
25
+
26
+ # @param input [String] input string
27
+ # @param format [String] format string
28
+ # @param col_sep [String]
29
+ #
30
+ # @return [Array<Object>]
31
+ def self.parse_str(input:, format:, col_sep: DEFAULT_COL_SEP, &block)
32
+ Parser.parse_str(
33
+ input: input,
34
+ format: Format.load_from_str(format),
35
+ col_sep: col_sep,
36
+ &block
37
+ )
38
+ end
39
+
40
+ # @param input [String] input string
41
+ # @param format [String] format string
42
+ # @param col_sep [String]
43
+ #
44
+ # @return [String]
45
+ def self.json(input:, format:, col_sep: DEFAULT_COL_SEP)
46
+ h = { 'vars' => parse_str(input: input, format: format, col_sep: col_sep) }
47
+ Oj.dump(h)
48
+ end
49
+ end
data/ui/.gitignore ADDED
@@ -0,0 +1,21 @@
1
+ # See https://help.github.com/ignore-files/ for more about ignoring files.
2
+
3
+ # dependencies
4
+ /node_modules
5
+
6
+ # testing
7
+ /coverage
8
+
9
+ # production
10
+ /build
11
+
12
+ # misc
13
+ .DS_Store
14
+ .env.local
15
+ .env.development.local
16
+ .env.test.local
17
+ .env.production.local
18
+
19
+ npm-debug.log*
20
+ yarn-debug.log*
21
+ yarn-error.log*