ruby_px 0.2.0 → 0.7.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +5 -5
- data/.gitignore +1 -0
- data/.travis.yml +3 -2
- data/CHANGELOG.md +14 -0
- data/Gemfile +2 -0
- data/README.md +1 -1
- data/Rakefile +5 -3
- data/bin/console +4 -3
- data/lib/ruby_px.rb +2 -0
- data/lib/ruby_px/dataset.rb +45 -44
- data/lib/ruby_px/dataset/data.rb +48 -0
- data/ruby_px.gemspec +19 -18
- metadata +16 -23
- data/.ruby-version +0 -1
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
|
-
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
2
|
+
SHA256:
|
3
|
+
metadata.gz: 9abe10d6ad1aed7bb9d28535671457cad398d7c17cab9c9bc49376dd46be5a3a
|
4
|
+
data.tar.gz: 95e9b594074e45eaa34e319c2ef58fc73edf4e5f6ceee9ae7c67bb2919407425
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: ff283ea31198ae6fd560b591ac6c1d7dc649a148a61a3a4570953c380c22f4c667fe15a569454df1649d744845b62734051d89863e7e8c523e2f270ea77b3762
|
7
|
+
data.tar.gz: a36341f8e244874a2d588b0e28319c43da1f9f8b2488007ea79f1cccf16be8db544bf59839d6c1dc8f495e0581e2f3341922c41df9ef6a3d907f57748a0e31f2
|
data/.gitignore
CHANGED
data/.travis.yml
CHANGED
data/CHANGELOG.md
ADDED
@@ -0,0 +1,14 @@
|
|
1
|
+
# CHANGELOG
|
2
|
+
|
3
|
+
## 0.7.0 (2021-03-04)
|
4
|
+
|
5
|
+
- Performance in large datasets [#9](https://github.com/PopulateTools/ruby_px/pull/9)
|
6
|
+
|
7
|
+
## 0.6.0 (2020-04-28)
|
8
|
+
|
9
|
+
- Nice badge with the current version
|
10
|
+
- Added Changelog!
|
11
|
+
- Updated dependencies
|
12
|
+
- Fix multilingual VALUES and equals signs in values [#7](https://github.com/PopulateTools/ruby_px/pull/7)
|
13
|
+
- Use Rubocop to check syntax
|
14
|
+
|
data/Gemfile
CHANGED
data/README.md
CHANGED
@@ -1,5 +1,6 @@
|
|
1
1
|
# RubyPx
|
2
2
|
|
3
|
+
[![Gem Version](https://badge.fury.io/rb/ruby_px.svg)](https://badge.fury.io/rb/ruby_px)
|
3
4
|
[![Build Status](https://travis-ci.org/PopulateTools/ruby_px.svg?branch=master)](https://travis-ci.org/PopulateTools/ruby_px)
|
4
5
|
|
5
6
|
Work with PC-Axis files using Ruby.
|
@@ -82,7 +83,6 @@ dataset.data('edad (año a año)' => 'Total', 'sexo' => 'Ambos sexos')
|
|
82
83
|
|
83
84
|
## TODO
|
84
85
|
|
85
|
-
- Allow to receive an URL as an argument
|
86
86
|
- Refactor
|
87
87
|
- Test the gem with more files
|
88
88
|
- Speed-up the parsing time
|
data/Rakefile
CHANGED
data/bin/console
CHANGED
@@ -1,7 +1,8 @@
|
|
1
1
|
#!/usr/bin/env ruby
|
2
|
+
# frozen_string_literal: true
|
2
3
|
|
3
|
-
require
|
4
|
-
|
4
|
+
require 'bundler/setup'
|
5
|
+
require_relative '../lib/ruby_px'
|
5
6
|
|
6
7
|
# You can add fixtures and/or initialization code here to make experimenting
|
7
8
|
# with your gem easier. You can also use a different console, if you like.
|
@@ -10,5 +11,5 @@ require "pc-axis/dataset"
|
|
10
11
|
# require "pry"
|
11
12
|
# Pry.start
|
12
13
|
|
13
|
-
require
|
14
|
+
require 'irb'
|
14
15
|
IRB.start
|
data/lib/ruby_px.rb
CHANGED
data/lib/ruby_px/dataset.rb
CHANGED
@@ -1,23 +1,25 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
require 'open-uri'
|
4
|
+
|
1
5
|
module RubyPx
|
2
6
|
class Dataset
|
7
|
+
require 'ruby_px/dataset/data'
|
8
|
+
|
3
9
|
attr_reader :headings, :stubs
|
4
10
|
|
5
|
-
METADATA_RECORDS = [
|
11
|
+
METADATA_RECORDS = %w[TITLE UNITS SOURCE CONTACT LAST-UPDATED CREATION-DATE].freeze
|
6
12
|
HEADING_RECORD = 'HEADING'
|
7
13
|
STUB_RECORD = 'STUB'
|
8
14
|
|
9
|
-
def initialize(
|
10
|
-
if !File.readable?(file)
|
11
|
-
raise "File #{file} not readable"
|
12
|
-
end
|
13
|
-
|
15
|
+
def initialize(resource_uri)
|
14
16
|
@metadata = {}
|
15
17
|
@headings = []
|
16
18
|
@stubs = []
|
17
19
|
@values = {}
|
18
|
-
@data =
|
20
|
+
@data = Data.new
|
19
21
|
|
20
|
-
|
22
|
+
parse_resource(resource_uri)
|
21
23
|
end
|
22
24
|
|
23
25
|
def title
|
@@ -54,8 +56,10 @@ module RubyPx
|
|
54
56
|
|
55
57
|
def data(options)
|
56
58
|
# Validate parameters
|
57
|
-
options.each do |k,v|
|
58
|
-
|
59
|
+
options.each do |k, v|
|
60
|
+
unless dimension(k).include?(v)
|
61
|
+
raise "Invalid value #{v} for dimension #{k}"
|
62
|
+
end
|
59
63
|
end
|
60
64
|
|
61
65
|
# Return a single value
|
@@ -67,20 +71,20 @@ module RubyPx
|
|
67
71
|
|
68
72
|
# positions are i, j, k
|
69
73
|
positions = (stubs + headings).map do |dimension_name|
|
70
|
-
|
74
|
+
dimension(dimension_name).index(options[dimension_name])
|
71
75
|
end
|
72
76
|
|
73
77
|
# dimension_sizes are from all dimensions except the first one
|
74
78
|
dimension_sizes = (stubs + headings)[1..-1].map do |dimension_name|
|
75
|
-
|
79
|
+
dimension(dimension_name).length
|
76
80
|
end
|
77
81
|
|
78
82
|
positions.each_with_index do |p, i|
|
79
83
|
d = dimension_sizes[i..-1].reduce(&:*)
|
80
|
-
offset += (d ? p*d : p)
|
84
|
+
offset += (d ? p * d : p)
|
81
85
|
end
|
82
86
|
|
83
|
-
|
87
|
+
@data.at(offset)
|
84
88
|
|
85
89
|
# Return an array of options
|
86
90
|
elsif options.length == dimensions.length - 1
|
@@ -91,30 +95,31 @@ module RubyPx
|
|
91
95
|
result << data(options.merge(missing_dimension => dimension_value))
|
92
96
|
end
|
93
97
|
|
94
|
-
|
98
|
+
result
|
95
99
|
else
|
96
|
-
raise
|
100
|
+
raise 'Not implented yet, sorry'
|
97
101
|
end
|
98
102
|
end
|
99
103
|
|
100
104
|
def inspect
|
101
|
-
"#<#{self.class.name}:#{
|
105
|
+
"#<#{self.class.name}:#{object_id}>"
|
102
106
|
end
|
103
107
|
|
104
108
|
private
|
105
109
|
|
106
|
-
def
|
107
|
-
|
110
|
+
def parse_resource(resource_uri)
|
111
|
+
open(resource_uri).each_line do |line|
|
108
112
|
parse_line(line.chomp)
|
109
113
|
end
|
110
|
-
|
114
|
+
|
115
|
+
true
|
111
116
|
end
|
112
117
|
|
113
118
|
def parse_line(line)
|
114
|
-
@line = line
|
119
|
+
@line = line.force_encoding('utf-8').encode('utf-8')
|
115
120
|
|
116
121
|
if @current_record.nil?
|
117
|
-
key, value = line.
|
122
|
+
key, value = line.split('=', 2)
|
118
123
|
set_current_record(key)
|
119
124
|
else
|
120
125
|
value = line
|
@@ -123,27 +128,25 @@ module RubyPx
|
|
123
128
|
return if @current_record.nil? || value.nil?
|
124
129
|
|
125
130
|
if @type == :data
|
126
|
-
value = value.split(
|
131
|
+
value = value.split(/[\ ;,\t]/).delete_if(&:blank?).each(&:strip)
|
127
132
|
|
128
|
-
add_value_to_bucket(bucket,value) unless value == [';']
|
133
|
+
add_value_to_bucket(bucket, value) unless value == [';']
|
129
134
|
else
|
130
135
|
# First format: "\"20141201\";"
|
131
136
|
if value =~ /\A\"([^"]+)\";\z/
|
132
137
|
value = value.match(/\A\"([^"]+)\";\z/)[1]
|
133
138
|
add_value_to_bucket(bucket, value.strip)
|
134
139
|
|
135
|
-
|
140
|
+
# Second format: "Ambos sexos","Hombres","Mujeres";
|
136
141
|
elsif value =~ /\"([^"]+)\",?/
|
137
|
-
value = value.split(/\"([^"]+)\",?;?/).delete_if
|
142
|
+
value = value.split(/\"([^"]+)\",?;?/).delete_if(&:blank?).each(&:strip)
|
138
143
|
add_value_to_bucket(bucket, value)
|
139
144
|
end
|
140
145
|
end
|
141
146
|
|
142
147
|
# If we see a ; at the end of the line, close out the record so we
|
143
148
|
# expect a new record.
|
144
|
-
if line[-1..-1] ==
|
145
|
-
@current_record = nil
|
146
|
-
end
|
149
|
+
@current_record = nil if line[-1..-1] == ';'
|
147
150
|
end
|
148
151
|
|
149
152
|
def set_current_record(key)
|
@@ -156,7 +159,7 @@ module RubyPx
|
|
156
159
|
elsif key == STUB_RECORD
|
157
160
|
@type = :stubs
|
158
161
|
key
|
159
|
-
elsif key =~ /\AVALUES/
|
162
|
+
elsif key =~ /\AVALUES/ && key !~ /\[\w\w\]/
|
160
163
|
@type = :values
|
161
164
|
key.match(/\"([^"]+)\"/)[1]
|
162
165
|
elsif key =~ /\ADATA/
|
@@ -175,22 +178,20 @@ module RubyPx
|
|
175
178
|
elsif @type == :headings || @type == :stubs
|
176
179
|
bucket << value
|
177
180
|
bucket.flatten!
|
178
|
-
|
179
|
-
if
|
180
|
-
|
181
|
-
|
182
|
-
|
183
|
-
|
184
|
-
|
185
|
-
if
|
186
|
-
|
187
|
-
|
188
|
-
|
189
|
-
|
190
|
-
end
|
181
|
+
elsif bucket.is_a?(Hash)
|
182
|
+
if value.is_a?(Array)
|
183
|
+
value = value.map(&:strip)
|
184
|
+
elsif value.is_a?(String)
|
185
|
+
value.strip!
|
186
|
+
end
|
187
|
+
if bucket[@current_record].nil?
|
188
|
+
value = Array.wrap(value) if @type == :values
|
189
|
+
bucket[@current_record] = value
|
190
|
+
else
|
191
|
+
bucket[@current_record].concat([value])
|
192
|
+
bucket[@current_record].flatten!
|
191
193
|
end
|
192
194
|
end
|
193
195
|
end
|
194
|
-
|
195
196
|
end
|
196
197
|
end
|
@@ -0,0 +1,48 @@
|
|
1
|
+
module RubyPx
|
2
|
+
class Dataset
|
3
|
+
class Data
|
4
|
+
|
5
|
+
CHUNK_SIZE = 5_000
|
6
|
+
attr_accessor :current_chunk_index
|
7
|
+
|
8
|
+
def initialize
|
9
|
+
@current_chunk_index = 0
|
10
|
+
end
|
11
|
+
|
12
|
+
def at index
|
13
|
+
chunk_index = index/CHUNK_SIZE
|
14
|
+
index_inside_chunk = index%CHUNK_SIZE
|
15
|
+
|
16
|
+
get_chunk(chunk_index)[index_inside_chunk]
|
17
|
+
end
|
18
|
+
|
19
|
+
def concat array
|
20
|
+
current_chunk.concat(array)
|
21
|
+
if current_chunk.size > CHUNK_SIZE
|
22
|
+
excess = current_chunk.pop(current_chunk.size-CHUNK_SIZE)
|
23
|
+
self.current_chunk_index += 1
|
24
|
+
concat(excess)
|
25
|
+
end
|
26
|
+
end
|
27
|
+
|
28
|
+
def indexes_count
|
29
|
+
self.current_chunk_index+1
|
30
|
+
end
|
31
|
+
|
32
|
+
private
|
33
|
+
|
34
|
+
|
35
|
+
def current_chunk
|
36
|
+
current = instance_variable_get("@chunk_#{self.current_chunk_index}")
|
37
|
+
return current if current
|
38
|
+
|
39
|
+
instance_variable_set("@chunk_#{self.current_chunk_index}", [])
|
40
|
+
end
|
41
|
+
|
42
|
+
def get_chunk chunk_index
|
43
|
+
instance_variable_get("@chunk_#{chunk_index}")
|
44
|
+
end
|
45
|
+
|
46
|
+
end
|
47
|
+
end
|
48
|
+
end
|
data/ruby_px.gemspec
CHANGED
@@ -1,34 +1,35 @@
|
|
1
|
-
#
|
2
|
-
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
lib = File.expand_path('lib', __dir__)
|
3
4
|
$LOAD_PATH.unshift(lib) unless $LOAD_PATH.include?(lib)
|
4
5
|
|
5
6
|
Gem::Specification.new do |spec|
|
6
|
-
spec.name =
|
7
|
-
spec.version = '0.
|
8
|
-
spec.authors = [
|
9
|
-
spec.email = [
|
7
|
+
spec.name = 'ruby_px'
|
8
|
+
spec.version = '0.7.0'
|
9
|
+
spec.authors = ['Fernando Blat']
|
10
|
+
spec.email = ['fernando@blat.es']
|
10
11
|
|
11
|
-
spec.summary =
|
12
|
-
spec.description =
|
13
|
-
spec.homepage =
|
14
|
-
spec.license =
|
12
|
+
spec.summary = 'Read PC-Axis files using Ruby'
|
13
|
+
spec.description = 'Read PC-Axis files using Ruby'
|
14
|
+
spec.homepage = 'https://github.com/PopulateTools/ruby_px'
|
15
|
+
spec.license = 'MIT'
|
15
16
|
|
16
17
|
# Prevent pushing this gem to RubyGems.org by setting 'allowed_push_host', or
|
17
18
|
# delete this section to allow pushing this gem to any host.
|
18
19
|
if spec.respond_to?(:metadata)
|
19
|
-
spec.metadata['allowed_push_host'] =
|
20
|
+
spec.metadata['allowed_push_host'] = 'https://rubygems.org'
|
20
21
|
else
|
21
|
-
raise
|
22
|
+
raise 'RubyGems 2.0 or newer is required to protect against public gem pushes.'
|
22
23
|
end
|
23
24
|
|
24
25
|
spec.files = `git ls-files -z`.split("\x0").reject { |f| f.match(%r{^(test|spec|features)/}) }
|
25
|
-
spec.bindir =
|
26
|
+
spec.bindir = 'exe'
|
26
27
|
spec.executables = spec.files.grep(%r{^exe/}) { |f| File.basename(f) }
|
27
|
-
spec.require_paths = [
|
28
|
+
spec.require_paths = ['lib']
|
28
29
|
|
29
|
-
spec.add_runtime_dependency
|
30
|
+
spec.add_runtime_dependency 'activesupport', '>= 6.0'
|
30
31
|
|
31
|
-
spec.add_development_dependency
|
32
|
-
spec.add_development_dependency
|
33
|
-
spec.add_development_dependency
|
32
|
+
spec.add_development_dependency 'bundler'
|
33
|
+
spec.add_development_dependency 'rake', '~> 13.0'
|
34
|
+
spec.add_development_dependency 'rspec', '~> 3.9'
|
34
35
|
end
|
metadata
CHANGED
@@ -1,88 +1,82 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: ruby_px
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.
|
4
|
+
version: 0.7.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Fernando Blat
|
8
8
|
autorequire:
|
9
9
|
bindir: exe
|
10
10
|
cert_chain: []
|
11
|
-
date:
|
11
|
+
date: 2021-03-05 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: activesupport
|
15
15
|
requirement: !ruby/object:Gem::Requirement
|
16
16
|
requirements:
|
17
|
-
- - "~>"
|
18
|
-
- !ruby/object:Gem::Version
|
19
|
-
version: '4.2'
|
20
17
|
- - ">="
|
21
18
|
- !ruby/object:Gem::Version
|
22
|
-
version:
|
19
|
+
version: '6.0'
|
23
20
|
type: :runtime
|
24
21
|
prerelease: false
|
25
22
|
version_requirements: !ruby/object:Gem::Requirement
|
26
23
|
requirements:
|
27
|
-
- - "~>"
|
28
|
-
- !ruby/object:Gem::Version
|
29
|
-
version: '4.2'
|
30
24
|
- - ">="
|
31
25
|
- !ruby/object:Gem::Version
|
32
|
-
version:
|
26
|
+
version: '6.0'
|
33
27
|
- !ruby/object:Gem::Dependency
|
34
28
|
name: bundler
|
35
29
|
requirement: !ruby/object:Gem::Requirement
|
36
30
|
requirements:
|
37
|
-
- - "
|
31
|
+
- - ">="
|
38
32
|
- !ruby/object:Gem::Version
|
39
|
-
version: '
|
33
|
+
version: '0'
|
40
34
|
type: :development
|
41
35
|
prerelease: false
|
42
36
|
version_requirements: !ruby/object:Gem::Requirement
|
43
37
|
requirements:
|
44
|
-
- - "
|
38
|
+
- - ">="
|
45
39
|
- !ruby/object:Gem::Version
|
46
|
-
version: '
|
40
|
+
version: '0'
|
47
41
|
- !ruby/object:Gem::Dependency
|
48
42
|
name: rake
|
49
43
|
requirement: !ruby/object:Gem::Requirement
|
50
44
|
requirements:
|
51
45
|
- - "~>"
|
52
46
|
- !ruby/object:Gem::Version
|
53
|
-
version: '
|
47
|
+
version: '13.0'
|
54
48
|
type: :development
|
55
49
|
prerelease: false
|
56
50
|
version_requirements: !ruby/object:Gem::Requirement
|
57
51
|
requirements:
|
58
52
|
- - "~>"
|
59
53
|
- !ruby/object:Gem::Version
|
60
|
-
version: '
|
54
|
+
version: '13.0'
|
61
55
|
- !ruby/object:Gem::Dependency
|
62
56
|
name: rspec
|
63
57
|
requirement: !ruby/object:Gem::Requirement
|
64
58
|
requirements:
|
65
59
|
- - "~>"
|
66
60
|
- !ruby/object:Gem::Version
|
67
|
-
version: '3.
|
61
|
+
version: '3.9'
|
68
62
|
type: :development
|
69
63
|
prerelease: false
|
70
64
|
version_requirements: !ruby/object:Gem::Requirement
|
71
65
|
requirements:
|
72
66
|
- - "~>"
|
73
67
|
- !ruby/object:Gem::Version
|
74
|
-
version: '3.
|
68
|
+
version: '3.9'
|
75
69
|
description: Read PC-Axis files using Ruby
|
76
70
|
email:
|
77
|
-
-
|
71
|
+
- fernando@blat.es
|
78
72
|
executables: []
|
79
73
|
extensions: []
|
80
74
|
extra_rdoc_files: []
|
81
75
|
files:
|
82
76
|
- ".gitignore"
|
83
77
|
- ".rspec"
|
84
|
-
- ".ruby-version"
|
85
78
|
- ".travis.yml"
|
79
|
+
- CHANGELOG.md
|
86
80
|
- CODE_OF_CONDUCT.md
|
87
81
|
- Gemfile
|
88
82
|
- LICENSE.txt
|
@@ -92,6 +86,7 @@ files:
|
|
92
86
|
- bin/setup
|
93
87
|
- lib/ruby_px.rb
|
94
88
|
- lib/ruby_px/dataset.rb
|
89
|
+
- lib/ruby_px/dataset/data.rb
|
95
90
|
- ruby_px.gemspec
|
96
91
|
homepage: https://github.com/PopulateTools/ruby_px
|
97
92
|
licenses:
|
@@ -113,10 +108,8 @@ required_rubygems_version: !ruby/object:Gem::Requirement
|
|
113
108
|
- !ruby/object:Gem::Version
|
114
109
|
version: '0'
|
115
110
|
requirements: []
|
116
|
-
|
117
|
-
rubygems_version: 2.4.5.1
|
111
|
+
rubygems_version: 3.1.2
|
118
112
|
signing_key:
|
119
113
|
specification_version: 4
|
120
114
|
summary: Read PC-Axis files using Ruby
|
121
115
|
test_files: []
|
122
|
-
has_rdoc:
|
data/.ruby-version
DELETED
@@ -1 +0,0 @@
|
|
1
|
-
2.2.3
|