ruby_px 0.2.0 → 0.7.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +5 -5
- data/.gitignore +1 -0
- data/.travis.yml +3 -2
- data/CHANGELOG.md +14 -0
- data/Gemfile +2 -0
- data/README.md +1 -1
- data/Rakefile +5 -3
- data/bin/console +4 -3
- data/lib/ruby_px.rb +2 -0
- data/lib/ruby_px/dataset.rb +45 -44
- data/lib/ruby_px/dataset/data.rb +48 -0
- data/ruby_px.gemspec +19 -18
- metadata +16 -23
- data/.ruby-version +0 -1
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
|
-
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
2
|
+
SHA256:
|
3
|
+
metadata.gz: 9abe10d6ad1aed7bb9d28535671457cad398d7c17cab9c9bc49376dd46be5a3a
|
4
|
+
data.tar.gz: 95e9b594074e45eaa34e319c2ef58fc73edf4e5f6ceee9ae7c67bb2919407425
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: ff283ea31198ae6fd560b591ac6c1d7dc649a148a61a3a4570953c380c22f4c667fe15a569454df1649d744845b62734051d89863e7e8c523e2f270ea77b3762
|
7
|
+
data.tar.gz: a36341f8e244874a2d588b0e28319c43da1f9f8b2488007ea79f1cccf16be8db544bf59839d6c1dc8f495e0581e2f3341922c41df9ef6a3d907f57748a0e31f2
|
data/.gitignore
CHANGED
data/.travis.yml
CHANGED
data/CHANGELOG.md
ADDED
@@ -0,0 +1,14 @@
|
|
1
|
+
# CHANGELOG
|
2
|
+
|
3
|
+
## 0.7.0 (2021-03-04)
|
4
|
+
|
5
|
+
- Performance in large datasets [#9](https://github.com/PopulateTools/ruby_px/pull/9)
|
6
|
+
|
7
|
+
## 0.6.0 (2020-04-28)
|
8
|
+
|
9
|
+
- Nice badge with the current version
|
10
|
+
- Added Changelog!
|
11
|
+
- Updated dependencies
|
12
|
+
- Fix multilingual VALUES and equals signs in values [#7](https://github.com/PopulateTools/ruby_px/pull/7)
|
13
|
+
- Use Rubocop to check syntax
|
14
|
+
|
data/Gemfile
CHANGED
data/README.md
CHANGED
@@ -1,5 +1,6 @@
|
|
1
1
|
# RubyPx
|
2
2
|
|
3
|
+
[](https://badge.fury.io/rb/ruby_px)
|
3
4
|
[](https://travis-ci.org/PopulateTools/ruby_px)
|
4
5
|
|
5
6
|
Work with PC-Axis files using Ruby.
|
@@ -82,7 +83,6 @@ dataset.data('edad (año a año)' => 'Total', 'sexo' => 'Ambos sexos')
|
|
82
83
|
|
83
84
|
## TODO
|
84
85
|
|
85
|
-
- Allow to receive an URL as an argument
|
86
86
|
- Refactor
|
87
87
|
- Test the gem with more files
|
88
88
|
- Speed-up the parsing time
|
data/Rakefile
CHANGED
data/bin/console
CHANGED
@@ -1,7 +1,8 @@
|
|
1
1
|
#!/usr/bin/env ruby
|
2
|
+
# frozen_string_literal: true
|
2
3
|
|
3
|
-
require
|
4
|
-
|
4
|
+
require 'bundler/setup'
|
5
|
+
require_relative '../lib/ruby_px'
|
5
6
|
|
6
7
|
# You can add fixtures and/or initialization code here to make experimenting
|
7
8
|
# with your gem easier. You can also use a different console, if you like.
|
@@ -10,5 +11,5 @@ require "pc-axis/dataset"
|
|
10
11
|
# require "pry"
|
11
12
|
# Pry.start
|
12
13
|
|
13
|
-
require
|
14
|
+
require 'irb'
|
14
15
|
IRB.start
|
data/lib/ruby_px.rb
CHANGED
data/lib/ruby_px/dataset.rb
CHANGED
@@ -1,23 +1,25 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
require 'open-uri'
|
4
|
+
|
1
5
|
module RubyPx
|
2
6
|
class Dataset
|
7
|
+
require 'ruby_px/dataset/data'
|
8
|
+
|
3
9
|
attr_reader :headings, :stubs
|
4
10
|
|
5
|
-
METADATA_RECORDS = [
|
11
|
+
METADATA_RECORDS = %w[TITLE UNITS SOURCE CONTACT LAST-UPDATED CREATION-DATE].freeze
|
6
12
|
HEADING_RECORD = 'HEADING'
|
7
13
|
STUB_RECORD = 'STUB'
|
8
14
|
|
9
|
-
def initialize(
|
10
|
-
if !File.readable?(file)
|
11
|
-
raise "File #{file} not readable"
|
12
|
-
end
|
13
|
-
|
15
|
+
def initialize(resource_uri)
|
14
16
|
@metadata = {}
|
15
17
|
@headings = []
|
16
18
|
@stubs = []
|
17
19
|
@values = {}
|
18
|
-
@data =
|
20
|
+
@data = Data.new
|
19
21
|
|
20
|
-
|
22
|
+
parse_resource(resource_uri)
|
21
23
|
end
|
22
24
|
|
23
25
|
def title
|
@@ -54,8 +56,10 @@ module RubyPx
|
|
54
56
|
|
55
57
|
def data(options)
|
56
58
|
# Validate parameters
|
57
|
-
options.each do |k,v|
|
58
|
-
|
59
|
+
options.each do |k, v|
|
60
|
+
unless dimension(k).include?(v)
|
61
|
+
raise "Invalid value #{v} for dimension #{k}"
|
62
|
+
end
|
59
63
|
end
|
60
64
|
|
61
65
|
# Return a single value
|
@@ -67,20 +71,20 @@ module RubyPx
|
|
67
71
|
|
68
72
|
# positions are i, j, k
|
69
73
|
positions = (stubs + headings).map do |dimension_name|
|
70
|
-
|
74
|
+
dimension(dimension_name).index(options[dimension_name])
|
71
75
|
end
|
72
76
|
|
73
77
|
# dimension_sizes are from all dimensions except the first one
|
74
78
|
dimension_sizes = (stubs + headings)[1..-1].map do |dimension_name|
|
75
|
-
|
79
|
+
dimension(dimension_name).length
|
76
80
|
end
|
77
81
|
|
78
82
|
positions.each_with_index do |p, i|
|
79
83
|
d = dimension_sizes[i..-1].reduce(&:*)
|
80
|
-
offset += (d ? p*d : p)
|
84
|
+
offset += (d ? p * d : p)
|
81
85
|
end
|
82
86
|
|
83
|
-
|
87
|
+
@data.at(offset)
|
84
88
|
|
85
89
|
# Return an array of options
|
86
90
|
elsif options.length == dimensions.length - 1
|
@@ -91,30 +95,31 @@ module RubyPx
|
|
91
95
|
result << data(options.merge(missing_dimension => dimension_value))
|
92
96
|
end
|
93
97
|
|
94
|
-
|
98
|
+
result
|
95
99
|
else
|
96
|
-
raise
|
100
|
+
raise 'Not implented yet, sorry'
|
97
101
|
end
|
98
102
|
end
|
99
103
|
|
100
104
|
def inspect
|
101
|
-
"#<#{self.class.name}:#{
|
105
|
+
"#<#{self.class.name}:#{object_id}>"
|
102
106
|
end
|
103
107
|
|
104
108
|
private
|
105
109
|
|
106
|
-
def
|
107
|
-
|
110
|
+
def parse_resource(resource_uri)
|
111
|
+
open(resource_uri).each_line do |line|
|
108
112
|
parse_line(line.chomp)
|
109
113
|
end
|
110
|
-
|
114
|
+
|
115
|
+
true
|
111
116
|
end
|
112
117
|
|
113
118
|
def parse_line(line)
|
114
|
-
@line = line
|
119
|
+
@line = line.force_encoding('utf-8').encode('utf-8')
|
115
120
|
|
116
121
|
if @current_record.nil?
|
117
|
-
key, value = line.
|
122
|
+
key, value = line.split('=', 2)
|
118
123
|
set_current_record(key)
|
119
124
|
else
|
120
125
|
value = line
|
@@ -123,27 +128,25 @@ module RubyPx
|
|
123
128
|
return if @current_record.nil? || value.nil?
|
124
129
|
|
125
130
|
if @type == :data
|
126
|
-
value = value.split(
|
131
|
+
value = value.split(/[\ ;,\t]/).delete_if(&:blank?).each(&:strip)
|
127
132
|
|
128
|
-
add_value_to_bucket(bucket,value) unless value == [';']
|
133
|
+
add_value_to_bucket(bucket, value) unless value == [';']
|
129
134
|
else
|
130
135
|
# First format: "\"20141201\";"
|
131
136
|
if value =~ /\A\"([^"]+)\";\z/
|
132
137
|
value = value.match(/\A\"([^"]+)\";\z/)[1]
|
133
138
|
add_value_to_bucket(bucket, value.strip)
|
134
139
|
|
135
|
-
|
140
|
+
# Second format: "Ambos sexos","Hombres","Mujeres";
|
136
141
|
elsif value =~ /\"([^"]+)\",?/
|
137
|
-
value = value.split(/\"([^"]+)\",?;?/).delete_if
|
142
|
+
value = value.split(/\"([^"]+)\",?;?/).delete_if(&:blank?).each(&:strip)
|
138
143
|
add_value_to_bucket(bucket, value)
|
139
144
|
end
|
140
145
|
end
|
141
146
|
|
142
147
|
# If we see a ; at the end of the line, close out the record so we
|
143
148
|
# expect a new record.
|
144
|
-
if line[-1..-1] ==
|
145
|
-
@current_record = nil
|
146
|
-
end
|
149
|
+
@current_record = nil if line[-1..-1] == ';'
|
147
150
|
end
|
148
151
|
|
149
152
|
def set_current_record(key)
|
@@ -156,7 +159,7 @@ module RubyPx
|
|
156
159
|
elsif key == STUB_RECORD
|
157
160
|
@type = :stubs
|
158
161
|
key
|
159
|
-
elsif key =~ /\AVALUES/
|
162
|
+
elsif key =~ /\AVALUES/ && key !~ /\[\w\w\]/
|
160
163
|
@type = :values
|
161
164
|
key.match(/\"([^"]+)\"/)[1]
|
162
165
|
elsif key =~ /\ADATA/
|
@@ -175,22 +178,20 @@ module RubyPx
|
|
175
178
|
elsif @type == :headings || @type == :stubs
|
176
179
|
bucket << value
|
177
180
|
bucket.flatten!
|
178
|
-
|
179
|
-
if
|
180
|
-
|
181
|
-
|
182
|
-
|
183
|
-
|
184
|
-
|
185
|
-
if
|
186
|
-
|
187
|
-
|
188
|
-
|
189
|
-
|
190
|
-
end
|
181
|
+
elsif bucket.is_a?(Hash)
|
182
|
+
if value.is_a?(Array)
|
183
|
+
value = value.map(&:strip)
|
184
|
+
elsif value.is_a?(String)
|
185
|
+
value.strip!
|
186
|
+
end
|
187
|
+
if bucket[@current_record].nil?
|
188
|
+
value = Array.wrap(value) if @type == :values
|
189
|
+
bucket[@current_record] = value
|
190
|
+
else
|
191
|
+
bucket[@current_record].concat([value])
|
192
|
+
bucket[@current_record].flatten!
|
191
193
|
end
|
192
194
|
end
|
193
195
|
end
|
194
|
-
|
195
196
|
end
|
196
197
|
end
|
@@ -0,0 +1,48 @@
|
|
1
|
+
module RubyPx
|
2
|
+
class Dataset
|
3
|
+
class Data
|
4
|
+
|
5
|
+
CHUNK_SIZE = 5_000
|
6
|
+
attr_accessor :current_chunk_index
|
7
|
+
|
8
|
+
def initialize
|
9
|
+
@current_chunk_index = 0
|
10
|
+
end
|
11
|
+
|
12
|
+
def at index
|
13
|
+
chunk_index = index/CHUNK_SIZE
|
14
|
+
index_inside_chunk = index%CHUNK_SIZE
|
15
|
+
|
16
|
+
get_chunk(chunk_index)[index_inside_chunk]
|
17
|
+
end
|
18
|
+
|
19
|
+
def concat array
|
20
|
+
current_chunk.concat(array)
|
21
|
+
if current_chunk.size > CHUNK_SIZE
|
22
|
+
excess = current_chunk.pop(current_chunk.size-CHUNK_SIZE)
|
23
|
+
self.current_chunk_index += 1
|
24
|
+
concat(excess)
|
25
|
+
end
|
26
|
+
end
|
27
|
+
|
28
|
+
def indexes_count
|
29
|
+
self.current_chunk_index+1
|
30
|
+
end
|
31
|
+
|
32
|
+
private
|
33
|
+
|
34
|
+
|
35
|
+
def current_chunk
|
36
|
+
current = instance_variable_get("@chunk_#{self.current_chunk_index}")
|
37
|
+
return current if current
|
38
|
+
|
39
|
+
instance_variable_set("@chunk_#{self.current_chunk_index}", [])
|
40
|
+
end
|
41
|
+
|
42
|
+
def get_chunk chunk_index
|
43
|
+
instance_variable_get("@chunk_#{chunk_index}")
|
44
|
+
end
|
45
|
+
|
46
|
+
end
|
47
|
+
end
|
48
|
+
end
|
data/ruby_px.gemspec
CHANGED
@@ -1,34 +1,35 @@
|
|
1
|
-
#
|
2
|
-
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
lib = File.expand_path('lib', __dir__)
|
3
4
|
$LOAD_PATH.unshift(lib) unless $LOAD_PATH.include?(lib)
|
4
5
|
|
5
6
|
Gem::Specification.new do |spec|
|
6
|
-
spec.name =
|
7
|
-
spec.version = '0.
|
8
|
-
spec.authors = [
|
9
|
-
spec.email = [
|
7
|
+
spec.name = 'ruby_px'
|
8
|
+
spec.version = '0.7.0'
|
9
|
+
spec.authors = ['Fernando Blat']
|
10
|
+
spec.email = ['fernando@blat.es']
|
10
11
|
|
11
|
-
spec.summary =
|
12
|
-
spec.description =
|
13
|
-
spec.homepage =
|
14
|
-
spec.license =
|
12
|
+
spec.summary = 'Read PC-Axis files using Ruby'
|
13
|
+
spec.description = 'Read PC-Axis files using Ruby'
|
14
|
+
spec.homepage = 'https://github.com/PopulateTools/ruby_px'
|
15
|
+
spec.license = 'MIT'
|
15
16
|
|
16
17
|
# Prevent pushing this gem to RubyGems.org by setting 'allowed_push_host', or
|
17
18
|
# delete this section to allow pushing this gem to any host.
|
18
19
|
if spec.respond_to?(:metadata)
|
19
|
-
spec.metadata['allowed_push_host'] =
|
20
|
+
spec.metadata['allowed_push_host'] = 'https://rubygems.org'
|
20
21
|
else
|
21
|
-
raise
|
22
|
+
raise 'RubyGems 2.0 or newer is required to protect against public gem pushes.'
|
22
23
|
end
|
23
24
|
|
24
25
|
spec.files = `git ls-files -z`.split("\x0").reject { |f| f.match(%r{^(test|spec|features)/}) }
|
25
|
-
spec.bindir =
|
26
|
+
spec.bindir = 'exe'
|
26
27
|
spec.executables = spec.files.grep(%r{^exe/}) { |f| File.basename(f) }
|
27
|
-
spec.require_paths = [
|
28
|
+
spec.require_paths = ['lib']
|
28
29
|
|
29
|
-
spec.add_runtime_dependency
|
30
|
+
spec.add_runtime_dependency 'activesupport', '>= 6.0'
|
30
31
|
|
31
|
-
spec.add_development_dependency
|
32
|
-
spec.add_development_dependency
|
33
|
-
spec.add_development_dependency
|
32
|
+
spec.add_development_dependency 'bundler'
|
33
|
+
spec.add_development_dependency 'rake', '~> 13.0'
|
34
|
+
spec.add_development_dependency 'rspec', '~> 3.9'
|
34
35
|
end
|
metadata
CHANGED
@@ -1,88 +1,82 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: ruby_px
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.
|
4
|
+
version: 0.7.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Fernando Blat
|
8
8
|
autorequire:
|
9
9
|
bindir: exe
|
10
10
|
cert_chain: []
|
11
|
-
date:
|
11
|
+
date: 2021-03-05 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: activesupport
|
15
15
|
requirement: !ruby/object:Gem::Requirement
|
16
16
|
requirements:
|
17
|
-
- - "~>"
|
18
|
-
- !ruby/object:Gem::Version
|
19
|
-
version: '4.2'
|
20
17
|
- - ">="
|
21
18
|
- !ruby/object:Gem::Version
|
22
|
-
version:
|
19
|
+
version: '6.0'
|
23
20
|
type: :runtime
|
24
21
|
prerelease: false
|
25
22
|
version_requirements: !ruby/object:Gem::Requirement
|
26
23
|
requirements:
|
27
|
-
- - "~>"
|
28
|
-
- !ruby/object:Gem::Version
|
29
|
-
version: '4.2'
|
30
24
|
- - ">="
|
31
25
|
- !ruby/object:Gem::Version
|
32
|
-
version:
|
26
|
+
version: '6.0'
|
33
27
|
- !ruby/object:Gem::Dependency
|
34
28
|
name: bundler
|
35
29
|
requirement: !ruby/object:Gem::Requirement
|
36
30
|
requirements:
|
37
|
-
- - "
|
31
|
+
- - ">="
|
38
32
|
- !ruby/object:Gem::Version
|
39
|
-
version: '
|
33
|
+
version: '0'
|
40
34
|
type: :development
|
41
35
|
prerelease: false
|
42
36
|
version_requirements: !ruby/object:Gem::Requirement
|
43
37
|
requirements:
|
44
|
-
- - "
|
38
|
+
- - ">="
|
45
39
|
- !ruby/object:Gem::Version
|
46
|
-
version: '
|
40
|
+
version: '0'
|
47
41
|
- !ruby/object:Gem::Dependency
|
48
42
|
name: rake
|
49
43
|
requirement: !ruby/object:Gem::Requirement
|
50
44
|
requirements:
|
51
45
|
- - "~>"
|
52
46
|
- !ruby/object:Gem::Version
|
53
|
-
version: '
|
47
|
+
version: '13.0'
|
54
48
|
type: :development
|
55
49
|
prerelease: false
|
56
50
|
version_requirements: !ruby/object:Gem::Requirement
|
57
51
|
requirements:
|
58
52
|
- - "~>"
|
59
53
|
- !ruby/object:Gem::Version
|
60
|
-
version: '
|
54
|
+
version: '13.0'
|
61
55
|
- !ruby/object:Gem::Dependency
|
62
56
|
name: rspec
|
63
57
|
requirement: !ruby/object:Gem::Requirement
|
64
58
|
requirements:
|
65
59
|
- - "~>"
|
66
60
|
- !ruby/object:Gem::Version
|
67
|
-
version: '3.
|
61
|
+
version: '3.9'
|
68
62
|
type: :development
|
69
63
|
prerelease: false
|
70
64
|
version_requirements: !ruby/object:Gem::Requirement
|
71
65
|
requirements:
|
72
66
|
- - "~>"
|
73
67
|
- !ruby/object:Gem::Version
|
74
|
-
version: '3.
|
68
|
+
version: '3.9'
|
75
69
|
description: Read PC-Axis files using Ruby
|
76
70
|
email:
|
77
|
-
-
|
71
|
+
- fernando@blat.es
|
78
72
|
executables: []
|
79
73
|
extensions: []
|
80
74
|
extra_rdoc_files: []
|
81
75
|
files:
|
82
76
|
- ".gitignore"
|
83
77
|
- ".rspec"
|
84
|
-
- ".ruby-version"
|
85
78
|
- ".travis.yml"
|
79
|
+
- CHANGELOG.md
|
86
80
|
- CODE_OF_CONDUCT.md
|
87
81
|
- Gemfile
|
88
82
|
- LICENSE.txt
|
@@ -92,6 +86,7 @@ files:
|
|
92
86
|
- bin/setup
|
93
87
|
- lib/ruby_px.rb
|
94
88
|
- lib/ruby_px/dataset.rb
|
89
|
+
- lib/ruby_px/dataset/data.rb
|
95
90
|
- ruby_px.gemspec
|
96
91
|
homepage: https://github.com/PopulateTools/ruby_px
|
97
92
|
licenses:
|
@@ -113,10 +108,8 @@ required_rubygems_version: !ruby/object:Gem::Requirement
|
|
113
108
|
- !ruby/object:Gem::Version
|
114
109
|
version: '0'
|
115
110
|
requirements: []
|
116
|
-
|
117
|
-
rubygems_version: 2.4.5.1
|
111
|
+
rubygems_version: 3.1.2
|
118
112
|
signing_key:
|
119
113
|
specification_version: 4
|
120
114
|
summary: Read PC-Axis files using Ruby
|
121
115
|
test_files: []
|
122
|
-
has_rdoc:
|
data/.ruby-version
DELETED
@@ -1 +0,0 @@
|
|
1
|
-
2.2.3
|