libhxl-ruby 0.0.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml ADDED
@@ -0,0 +1,7 @@
1
+ ---
2
+ SHA1:
3
+ metadata.gz: 719cfb05f414be3c3f796a5549d30d68ed4e9a03
4
+ data.tar.gz: 75f03298ddd344a4cca609a3e3db1897da1ad35e
5
+ SHA512:
6
+ metadata.gz: 29f19279e9d8f3d60b2d1d3e536193db92d70ab8936b7b866d9cfd62853548830031dfbb37e95fdd333e5c18c4402afc5097b123e629002b13a4be4e4358820e
7
+ data.tar.gz: d9db4b7a208fa9bf3c26d650ea9fd250d6f1b4197682648f427069cd8f13921a9dbf9a238b762f776cd33d68fc2a5862110789e3c099e92e21f8b17e88a3c17e
@@ -0,0 +1,193 @@
1
+ class HXLReader
2
+
3
+ def initialize
4
+
5
+ end
6
+
7
+ def self.foreach(path, &block)
8
+
9
+ table_spec = nil
10
+ prev_row = nil
11
+
12
+ source_row_number = -1
13
+ row_number = -1
14
+ disaggregation_position = 0
15
+
16
+ CSV.foreach(path) do |row|
17
+
18
+ source_row_number += 1
19
+
20
+ # If we don't have a table_spec yet (row of HXL tags), scan for one
21
+ if table_spec.nil?
22
+
23
+ table_spec = self.parse_hashtag_row(row, prev_row)
24
+
25
+ next if table_spec
26
+
27
+ end
28
+ prev_row = row
29
+ next if table_spec.nil?
30
+
31
+
32
+
33
+ disaggregation_position = 0
34
+
35
+ loop do
36
+ # Next logical row
37
+ row_number += 1
38
+
39
+ hxl_row, disaggregation_position = parse_row(row,
40
+ table_spec,
41
+ disaggregation_position,
42
+ row_number,
43
+ source_row_number)
44
+ yield hxl_row
45
+
46
+ break unless disaggregation_position < table_spec.get_disaggregation_count
47
+ end
48
+
49
+
50
+ end
51
+
52
+ raise HXLFormatError.new('HXL hashtag row not found') if table_spec.nil?
53
+ end
54
+
55
+ def self.parse_row(row, table_spec, disaggregation_position, row_number, source_row_number)
56
+
57
+ hxl_fields = []
58
+ col_num = -1
59
+
60
+ seen_fixed = false
61
+ row.each_with_index do |value, source_col_number|
62
+
63
+ col_spec = table_spec.col_specs[source_col_number]
64
+
65
+ # Only parse HXL columns
66
+ next if col_spec.column.hxl_tag.nil?
67
+
68
+ if col_spec.fixed_column
69
+ # Looking at disaggregation
70
+
71
+ if !seen_fixed
72
+ col_num += 1
73
+ raw_position = table_spec.get_raw_position(disaggregation_position)
74
+
75
+ hxl_fields.push HXLValue.new(table_spec.col_specs[raw_position].fixed_column,
76
+ table_spec.col_specs[raw_position].fixed_value,
77
+ col_num,
78
+ source_col_number)
79
+
80
+ col_num += 1
81
+ hxl_fields.push HXLValue.new(table_spec.col_specs[raw_position].column,
82
+ row[raw_position],
83
+ col_num,
84
+ source_col_number)
85
+
86
+ seen_fixed = true
87
+ disaggregation_position += 1
88
+
89
+ end
90
+ else
91
+ # Regular column
92
+ col_num += 1
93
+ hxl_fields.push HXLValue.new(table_spec.col_specs[source_col_number].column,
94
+ value,
95
+ col_num,
96
+ source_col_number)
97
+
98
+
99
+ end
100
+ end
101
+
102
+ hxl_row = HXLRow.new(
103
+ table_spec.hxl_headers,
104
+ hxl_fields,
105
+ false,
106
+ row_number,
107
+ source_row_number)
108
+
109
+ return hxl_row, disaggregation_position
110
+ end
111
+
112
+ def self.parse_hashtag_row(row, prev_row)
113
+
114
+ # Try parsing the current raw CSV data row as a HXL hashtag row.
115
+ # Returns a HXLTableSpec on success, or None on failure
116
+
117
+ seen_header = false
118
+ table_spec = HXLTableSpec.new
119
+
120
+ row.each_with_index do |value, col_num|
121
+ value = value.strip if value
122
+ col_spec = nil
123
+
124
+ if !value.nil? && !value.empty?
125
+ col_spec = self.parse_hashtag(col_num, value)
126
+ return nil if col_spec.nil?
127
+
128
+ seen_header = true
129
+
130
+ if col_spec.fixed_column
131
+ col_spec.fixed_value = prev_row[col_num]
132
+ end
133
+
134
+ else
135
+ col_spec = HXLColSpec.new col_num, HXLColumn.new
136
+ end
137
+
138
+ table_spec.push col_spec
139
+ end
140
+
141
+ return table_spec if seen_header
142
+
143
+ nil
144
+ end
145
+
146
+ def self.parse_hashtag(source_col_number, value)
147
+
148
+ # Pattern for a single tag
149
+ tag_regex = /(#[\w]+)(?:\/([[:alpha:]]{2}))?/
150
+
151
+ # Pattern for full tag spec (optional second tag following '+')
152
+ full_regex = /^\s*#{tag_regex}(?:\s*\+\s*#{tag_regex})?$/
153
+
154
+ result = full_regex.match value
155
+ col_spec = nil
156
+
157
+ if result
158
+ col1 = nil
159
+ col2 = nil
160
+
161
+ if result[3]
162
+ # There were two tags
163
+ col1 = HXLColumn.new result[1], result[2]
164
+ col2 = HXLColumn.new result[3], result[4]
165
+ col_spec = HXLColSpec.new source_col_number, col2, col1
166
+
167
+ else
168
+ col1 = HXLColumn.new result[1], result[2]
169
+ col_spec = HXLColSpec.new source_col_number, col1
170
+ end
171
+ end
172
+
173
+ col_spec
174
+ end
175
+
176
+ def self.parse_table_spec(row, prev_row)
177
+ # Search for the HXL hashtag row
178
+ # Returns a HXLTableSpec on success. Throws an exception on failure.
179
+
180
+ raw = self.parse_source_row
181
+ end
182
+
183
+ def self.parse_source_row
184
+ end
185
+
186
+ end
187
+
188
+ require 'libhxl-ruby/hxl_format_error'
189
+ require 'libhxl-ruby/hxl_table_spec'
190
+ require 'libhxl-ruby/hxl_row'
191
+ require 'libhxl-ruby/hxl_value'
192
+ require 'libhxl-ruby/hxl_column'
193
+ require 'libhxl-ruby/hxl_col_spec'
@@ -0,0 +1,19 @@
1
+ class HXLReader::HXLColSpec
2
+
3
+ attr_reader :fixed_column, :source_col_number, :column
4
+ attr_accessor :fixed_value
5
+
6
+ # Column metadata for parsing a HXL CSV file
7
+ #
8
+ # This class captures the way a column is encoded in the input CSV
9
+ # file, which might be different from the logical structure of the
10
+ # HXL data. Used only during parsing.
11
+
12
+ def initialize(source_col_number, column = nil, fixed_column = nil, fixed_value = nil)
13
+ @source_col_number = source_col_number
14
+ @column = column
15
+ @fixed_column = fixed_column
16
+ @fixed_value = fixed_value
17
+ end
18
+
19
+ end
@@ -0,0 +1,20 @@
1
+ class HXLReader::HXLColumn
2
+
3
+ attr_reader :hxl_tag, :language_code, :header_text
4
+
5
+ def initialize(hxl_tag = nil, language_code = nil)
6
+ @hxl_tag = hxl_tag
7
+ @language_code = language_code
8
+ @header_text = self.pretty_tag
9
+ end
10
+
11
+ def pretty_tag
12
+ return '' unless @hxl_tag
13
+
14
+ @hxl_tag
15
+ .gsub(/^#/, '')
16
+ .gsub(/_(date|deg|id|link|num)$/, '')
17
+ .gsub('_', ' ')
18
+ .upcase
19
+ end
20
+ end
@@ -0,0 +1,2 @@
1
+ class HXLReader::HXLFormatError < StandardError
2
+ end
@@ -0,0 +1,27 @@
1
+ require 'csv'
2
+
3
+ class HXLReader::HXLRow < CSV::Row
4
+ # An iterable row of HXL value objects
5
+
6
+ attr_reader :row_number, :source_row_number
7
+
8
+
9
+ def initialize(headers, fields, header_row = false, row_number = nil, source_row_number = nil)
10
+ super headers, fields, header_row
11
+ @row_number = row_number
12
+ @source_row_number = source_row_number
13
+
14
+ end
15
+
16
+ def to_s
17
+ s = '<HXLRow';
18
+ s += "\n rowNumber: " + @row_number.to_s
19
+ s += "\n sourceRowNumber: " + @sourceRowNumber.to_s
20
+ s += "\n"
21
+ s += @row.map(&:to_s).join('\n ')
22
+ s += "\n>"
23
+
24
+ s
25
+ end
26
+
27
+ end
@@ -0,0 +1,45 @@
1
+ class HXLReader::HXLTableSpec
2
+
3
+ attr_reader :col_specs
4
+
5
+ # Table metadata for parsing a HXL dataset
6
+
7
+ def initialize
8
+ @col_specs = []
9
+ end
10
+
11
+ def push(col_spec)
12
+ @col_specs.push col_spec
13
+ end
14
+
15
+ def hxl_headers
16
+ headers = []
17
+ seen_fixed = false
18
+ @col_specs.each do |spec|
19
+ if spec.fixed_column && !seen_fixed
20
+ headers.push spec.fixed_column.hxl_tag
21
+ headers.push spec.column.hxl_tag unless spec.column.hxl_tag.nil?
22
+ seen_fixed = true
23
+ elsif !spec.fixed_column
24
+ headers.push spec.column.hxl_tag unless spec.column.hxl_tag.nil?
25
+ end
26
+ end
27
+
28
+ headers
29
+ end
30
+
31
+ def get_disaggregation_count
32
+ (@col_specs.select { |col_spec| col_spec.fixed_column }).length
33
+ end
34
+
35
+ def get_raw_position(disaggregation_position)
36
+ @col_specs.each_with_index do |col_spec, i|
37
+ disaggregation_position -= 1 if col_spec.fixed_column
38
+
39
+ return i if disaggregation_position < 0
40
+ end
41
+
42
+ return -1
43
+ end
44
+
45
+ end
@@ -0,0 +1,12 @@
1
+ class HXLReader::HXLValue
2
+ # A single HXL value at the intersection of a row and column
3
+ attr_reader :column, :value, :col_num, :source_col_num
4
+
5
+ def initialize(column, value, col_num, source_col_num)
6
+ @column = column
7
+ @value = value
8
+ @col_num = col_num
9
+ @source_col_num = source_col_num
10
+ end
11
+
12
+ end
metadata ADDED
@@ -0,0 +1,65 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: libhxl-ruby
3
+ version: !ruby/object:Gem::Version
4
+ version: 0.0.1
5
+ platform: ruby
6
+ authors:
7
+ - Ben Rudolph
8
+ autorequire:
9
+ bindir: bin
10
+ cert_chain: []
11
+ date: 2014-10-19 00:00:00.000000000 Z
12
+ dependencies:
13
+ - !ruby/object:Gem::Dependency
14
+ name: rspec
15
+ requirement: !ruby/object:Gem::Requirement
16
+ requirements:
17
+ - - ~>
18
+ - !ruby/object:Gem::Version
19
+ version: '3.1'
20
+ type: :development
21
+ prerelease: false
22
+ version_requirements: !ruby/object:Gem::Requirement
23
+ requirements:
24
+ - - ~>
25
+ - !ruby/object:Gem::Version
26
+ version: '3.1'
27
+ description: A simple gem to parse your HXL files
28
+ email: rudolphben@gmail.com
29
+ executables: []
30
+ extensions: []
31
+ extra_rdoc_files: []
32
+ files:
33
+ - lib/libhxl-ruby.rb
34
+ - lib/libhxl-ruby/hxl_col_spec.rb
35
+ - lib/libhxl-ruby/hxl_column.rb
36
+ - lib/libhxl-ruby/hxl_format_error.rb
37
+ - lib/libhxl-ruby/hxl_row.rb
38
+ - lib/libhxl-ruby/hxl_table_spec.rb
39
+ - lib/libhxl-ruby/hxl_value.rb
40
+ homepage: https://github.com/benrudolph/libhxl-ruby
41
+ licenses:
42
+ - MIT
43
+ metadata: {}
44
+ post_install_message:
45
+ rdoc_options: []
46
+ require_paths:
47
+ - lib
48
+ required_ruby_version: !ruby/object:Gem::Requirement
49
+ requirements:
50
+ - - '>='
51
+ - !ruby/object:Gem::Version
52
+ version: '0'
53
+ required_rubygems_version: !ruby/object:Gem::Requirement
54
+ requirements:
55
+ - - '>='
56
+ - !ruby/object:Gem::Version
57
+ version: '0'
58
+ requirements: []
59
+ rubyforge_project:
60
+ rubygems_version: 2.4.2
61
+ signing_key:
62
+ specification_version: 4
63
+ summary: A HXL parser for ruby
64
+ test_files: []
65
+ has_rdoc: