ruby_px 0.2.0 → 0.7.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
- SHA1:
3
- metadata.gz: 44941ebae6202b2580d1303054c643bd3e713971
4
- data.tar.gz: 4d9b8e3bc4dc2ae1156366473137d68618f31b05
2
+ SHA256:
3
+ metadata.gz: 9abe10d6ad1aed7bb9d28535671457cad398d7c17cab9c9bc49376dd46be5a3a
4
+ data.tar.gz: 95e9b594074e45eaa34e319c2ef58fc73edf4e5f6ceee9ae7c67bb2919407425
5
5
  SHA512:
6
- metadata.gz: 915d7fa2b750f11a241d25b2188210e4cbcceb2041a6c501fe78df65f480e21bce23bf172be39d1972d910ad05fea61277706fe543124a2e2bd9d5371a5aebcf
7
- data.tar.gz: 37b7bdc3faa735d97cbd04cc341e8811a49f3340b69a8e25ed21cd150e7a0520ced342c5d1fc7b7f81da786f13c941fa48c43b122f4d91cb3ff070b2451c0361
6
+ metadata.gz: ff283ea31198ae6fd560b591ac6c1d7dc649a148a61a3a4570953c380c22f4c667fe15a569454df1649d744845b62734051d89863e7e8c523e2f270ea77b3762
7
+ data.tar.gz: a36341f8e244874a2d588b0e28319c43da1f9f8b2488007ea79f1cccf16be8db544bf59839d6c1dc8f495e0581e2f3341922c41df9ef6a3d907f57748a0e31f2
data/.gitignore CHANGED
@@ -8,3 +8,4 @@
8
8
  /spec/reports/
9
9
  /tmp/
10
10
  .byebug_history
11
+ .ruby-version
data/.travis.yml CHANGED
@@ -1,4 +1,5 @@
1
1
  language: ruby
2
2
  rvm:
3
- - 2.2.3
4
- before_install: gem install bundler -v 1.10.6
3
+ - 2.7.1
4
+ - 2.6.3
5
+ before_install: gem install bundler
data/CHANGELOG.md ADDED
@@ -0,0 +1,14 @@
1
+ # CHANGELOG
2
+
3
+ ## 0.7.0 (2021-03-04)
4
+
5
+ - Performance in large datasets [#9](https://github.com/PopulateTools/ruby_px/pull/9)
6
+
7
+ ## 0.6.0 (2020-04-28)
8
+
9
+ - Nice badge with the current version
10
+ - Added Changelog!
11
+ - Updated dependencies
12
+ - Fix multilingual VALUES and equals signs in values [#7](https://github.com/PopulateTools/ruby_px/pull/7)
13
+ - Use Rubocop to check syntax
14
+
data/Gemfile CHANGED
@@ -1,3 +1,5 @@
1
+ # frozen_string_literal: true
2
+
1
3
  source 'https://rubygems.org'
2
4
 
3
5
  # Specify your gem's dependencies in ruby_px.gemspec
data/README.md CHANGED
@@ -1,5 +1,6 @@
1
1
  # RubyPx
2
2
 
3
+ [![Gem Version](https://badge.fury.io/rb/ruby_px.svg)](https://badge.fury.io/rb/ruby_px)
3
4
  [![Build Status](https://travis-ci.org/PopulateTools/ruby_px.svg?branch=master)](https://travis-ci.org/PopulateTools/ruby_px)
4
5
 
5
6
  Work with PC-Axis files using Ruby.
@@ -82,7 +83,6 @@ dataset.data('edad (año a año)' => 'Total', 'sexo' => 'Ambos sexos')
82
83
 
83
84
  ## TODO
84
85
 
85
- - Allow to receive an URL as an argument
86
86
  - Refactor
87
87
  - Test the gem with more files
88
88
  - Speed-up the parsing time
data/Rakefile CHANGED
@@ -1,6 +1,8 @@
1
- require "bundler/gem_tasks"
2
- require "rspec/core/rake_task"
1
+ # frozen_string_literal: true
2
+
3
+ require 'bundler/gem_tasks'
4
+ require 'rspec/core/rake_task'
3
5
 
4
6
  RSpec::Core::RakeTask.new(:spec)
5
7
 
6
- task :default => :spec
8
+ task default: :spec
data/bin/console CHANGED
@@ -1,7 +1,8 @@
1
1
  #!/usr/bin/env ruby
2
+ # frozen_string_literal: true
2
3
 
3
- require "bundler/setup"
4
- require "pc-axis/dataset"
4
+ require 'bundler/setup'
5
+ require_relative '../lib/ruby_px'
5
6
 
6
7
  # You can add fixtures and/or initialization code here to make experimenting
7
8
  # with your gem easier. You can also use a different console, if you like.
@@ -10,5 +11,5 @@ require "pc-axis/dataset"
10
11
  # require "pry"
11
12
  # Pry.start
12
13
 
13
- require "irb"
14
+ require 'irb'
14
15
  IRB.start
data/lib/ruby_px.rb CHANGED
@@ -1,3 +1,5 @@
1
+ # frozen_string_literal: true
2
+
1
3
  require 'active_support/all'
2
4
 
3
5
  module RubyPx
@@ -1,23 +1,25 @@
1
+ # frozen_string_literal: true
2
+
3
+ require 'open-uri'
4
+
1
5
  module RubyPx
2
6
  class Dataset
7
+ require 'ruby_px/dataset/data'
8
+
3
9
  attr_reader :headings, :stubs
4
10
 
5
- METADATA_RECORDS = ['TITLE','UNITS','SOURCE','CONTACT','LAST-UPDATED','CREATION-DATE']
11
+ METADATA_RECORDS = %w[TITLE UNITS SOURCE CONTACT LAST-UPDATED CREATION-DATE].freeze
6
12
  HEADING_RECORD = 'HEADING'
7
13
  STUB_RECORD = 'STUB'
8
14
 
9
- def initialize(file)
10
- if !File.readable?(file)
11
- raise "File #{file} not readable"
12
- end
13
-
15
+ def initialize(resource_uri)
14
16
  @metadata = {}
15
17
  @headings = []
16
18
  @stubs = []
17
19
  @values = {}
18
- @data = []
20
+ @data = Data.new
19
21
 
20
- parse_file(file)
22
+ parse_resource(resource_uri)
21
23
  end
22
24
 
23
25
  def title
@@ -54,8 +56,10 @@ module RubyPx
54
56
 
55
57
  def data(options)
56
58
  # Validate parameters
57
- options.each do |k,v|
58
- raise "Invalid value #{v} for dimension #{k}" unless dimension(k).include?(v)
59
+ options.each do |k, v|
60
+ unless dimension(k).include?(v)
61
+ raise "Invalid value #{v} for dimension #{k}"
62
+ end
59
63
  end
60
64
 
61
65
  # Return a single value
@@ -67,20 +71,20 @@ module RubyPx
67
71
 
68
72
  # positions are i, j, k
69
73
  positions = (stubs + headings).map do |dimension_name|
70
- self.dimension(dimension_name).index(options[dimension_name])
74
+ dimension(dimension_name).index(options[dimension_name])
71
75
  end
72
76
 
73
77
  # dimension_sizes are from all dimensions except the first one
74
78
  dimension_sizes = (stubs + headings)[1..-1].map do |dimension_name|
75
- self.dimension(dimension_name).length
79
+ dimension(dimension_name).length
76
80
  end
77
81
 
78
82
  positions.each_with_index do |p, i|
79
83
  d = dimension_sizes[i..-1].reduce(&:*)
80
- offset += (d ? p*d : p)
84
+ offset += (d ? p * d : p)
81
85
  end
82
86
 
83
- return @data[offset]
87
+ @data.at(offset)
84
88
 
85
89
  # Return an array of options
86
90
  elsif options.length == dimensions.length - 1
@@ -91,30 +95,31 @@ module RubyPx
91
95
  result << data(options.merge(missing_dimension => dimension_value))
92
96
  end
93
97
 
94
- return result
98
+ result
95
99
  else
96
- raise "Not implented yet, sorry"
100
+ raise 'Not implented yet, sorry'
97
101
  end
98
102
  end
99
103
 
100
104
  def inspect
101
- "#<#{self.class.name}:#{self.object_id}>"
105
+ "#<#{self.class.name}:#{object_id}>"
102
106
  end
103
107
 
104
108
  private
105
109
 
106
- def parse_file(file)
107
- File.foreach(file) do |line|
110
+ def parse_resource(resource_uri)
111
+ open(resource_uri).each_line do |line|
108
112
  parse_line(line.chomp)
109
113
  end
110
- return true
114
+
115
+ true
111
116
  end
112
117
 
113
118
  def parse_line(line)
114
- @line = line
119
+ @line = line.force_encoding('utf-8').encode('utf-8')
115
120
 
116
121
  if @current_record.nil?
117
- key, value = line.scan(/[^\=]+/)
122
+ key, value = line.split('=', 2)
118
123
  set_current_record(key)
119
124
  else
120
125
  value = line
@@ -123,27 +128,25 @@ module RubyPx
123
128
  return if @current_record.nil? || value.nil?
124
129
 
125
130
  if @type == :data
126
- value = value.split(' ')
131
+ value = value.split(/[\ ;,\t]/).delete_if(&:blank?).each(&:strip)
127
132
 
128
- add_value_to_bucket(bucket,value) unless value == [';']
133
+ add_value_to_bucket(bucket, value) unless value == [';']
129
134
  else
130
135
  # First format: "\"20141201\";"
131
136
  if value =~ /\A\"([^"]+)\";\z/
132
137
  value = value.match(/\A\"([^"]+)\";\z/)[1]
133
138
  add_value_to_bucket(bucket, value.strip)
134
139
 
135
- # Second format: "Ambos sexos","Hombres","Mujeres";
140
+ # Second format: "Ambos sexos","Hombres","Mujeres";
136
141
  elsif value =~ /\"([^"]+)\",?/
137
- value = value.split(/\"([^"]+)\",?;?/).delete_if{ |s| s.blank? }.each(&:strip)
142
+ value = value.split(/\"([^"]+)\",?;?/).delete_if(&:blank?).each(&:strip)
138
143
  add_value_to_bucket(bucket, value)
139
144
  end
140
145
  end
141
146
 
142
147
  # If we see a ; at the end of the line, close out the record so we
143
148
  # expect a new record.
144
- if line[-1..-1] == ";"
145
- @current_record = nil
146
- end
149
+ @current_record = nil if line[-1..-1] == ';'
147
150
  end
148
151
 
149
152
  def set_current_record(key)
@@ -156,7 +159,7 @@ module RubyPx
156
159
  elsif key == STUB_RECORD
157
160
  @type = :stubs
158
161
  key
159
- elsif key =~ /\AVALUES/
162
+ elsif key =~ /\AVALUES/ && key !~ /\[\w\w\]/
160
163
  @type = :values
161
164
  key.match(/\"([^"]+)\"/)[1]
162
165
  elsif key =~ /\ADATA/
@@ -175,22 +178,20 @@ module RubyPx
175
178
  elsif @type == :headings || @type == :stubs
176
179
  bucket << value
177
180
  bucket.flatten!
178
- else
179
- if bucket.is_a?(Hash)
180
- if value.is_a?(Array)
181
- value = value.map(&:strip)
182
- elsif value.is_a?(String)
183
- value.strip!
184
- end
185
- if bucket[@current_record].nil?
186
- bucket[@current_record] = value
187
- else
188
- bucket[@current_record].concat([value])
189
- bucket[@current_record].flatten!
190
- end
181
+ elsif bucket.is_a?(Hash)
182
+ if value.is_a?(Array)
183
+ value = value.map(&:strip)
184
+ elsif value.is_a?(String)
185
+ value.strip!
186
+ end
187
+ if bucket[@current_record].nil?
188
+ value = Array.wrap(value) if @type == :values
189
+ bucket[@current_record] = value
190
+ else
191
+ bucket[@current_record].concat([value])
192
+ bucket[@current_record].flatten!
191
193
  end
192
194
  end
193
195
  end
194
-
195
196
  end
196
197
  end
@@ -0,0 +1,48 @@
1
+ module RubyPx
2
+ class Dataset
3
+ class Data
4
+
5
+ CHUNK_SIZE = 5_000
6
+ attr_accessor :current_chunk_index
7
+
8
+ def initialize
9
+ @current_chunk_index = 0
10
+ end
11
+
12
+ def at index
13
+ chunk_index = index/CHUNK_SIZE
14
+ index_inside_chunk = index%CHUNK_SIZE
15
+
16
+ get_chunk(chunk_index)[index_inside_chunk]
17
+ end
18
+
19
+ def concat array
20
+ current_chunk.concat(array)
21
+ if current_chunk.size > CHUNK_SIZE
22
+ excess = current_chunk.pop(current_chunk.size-CHUNK_SIZE)
23
+ self.current_chunk_index += 1
24
+ concat(excess)
25
+ end
26
+ end
27
+
28
+ def indexes_count
29
+ self.current_chunk_index+1
30
+ end
31
+
32
+ private
33
+
34
+
35
+ def current_chunk
36
+ current = instance_variable_get("@chunk_#{self.current_chunk_index}")
37
+ return current if current
38
+
39
+ instance_variable_set("@chunk_#{self.current_chunk_index}", [])
40
+ end
41
+
42
+ def get_chunk chunk_index
43
+ instance_variable_get("@chunk_#{chunk_index}")
44
+ end
45
+
46
+ end
47
+ end
48
+ end
data/ruby_px.gemspec CHANGED
@@ -1,34 +1,35 @@
1
- # coding: utf-8
2
- lib = File.expand_path('../lib', __FILE__)
1
+ # frozen_string_literal: true
2
+
3
+ lib = File.expand_path('lib', __dir__)
3
4
  $LOAD_PATH.unshift(lib) unless $LOAD_PATH.include?(lib)
4
5
 
5
6
  Gem::Specification.new do |spec|
6
- spec.name = "ruby_px"
7
- spec.version = '0.2.0'
8
- spec.authors = ["Fernando Blat"]
9
- spec.email = ["ferblape@gmail.com"]
7
+ spec.name = 'ruby_px'
8
+ spec.version = '0.7.0'
9
+ spec.authors = ['Fernando Blat']
10
+ spec.email = ['fernando@blat.es']
10
11
 
11
- spec.summary = %q{Read PC-Axis files using Ruby}
12
- spec.description = %q{Read PC-Axis files using Ruby}
13
- spec.homepage = "https://github.com/PopulateTools/ruby_px"
14
- spec.license = "MIT"
12
+ spec.summary = 'Read PC-Axis files using Ruby'
13
+ spec.description = 'Read PC-Axis files using Ruby'
14
+ spec.homepage = 'https://github.com/PopulateTools/ruby_px'
15
+ spec.license = 'MIT'
15
16
 
16
17
  # Prevent pushing this gem to RubyGems.org by setting 'allowed_push_host', or
17
18
  # delete this section to allow pushing this gem to any host.
18
19
  if spec.respond_to?(:metadata)
19
- spec.metadata['allowed_push_host'] = "https://rubygems.org"
20
+ spec.metadata['allowed_push_host'] = 'https://rubygems.org'
20
21
  else
21
- raise "RubyGems 2.0 or newer is required to protect against public gem pushes."
22
+ raise 'RubyGems 2.0 or newer is required to protect against public gem pushes.'
22
23
  end
23
24
 
24
25
  spec.files = `git ls-files -z`.split("\x0").reject { |f| f.match(%r{^(test|spec|features)/}) }
25
- spec.bindir = "exe"
26
+ spec.bindir = 'exe'
26
27
  spec.executables = spec.files.grep(%r{^exe/}) { |f| File.basename(f) }
27
- spec.require_paths = ["lib"]
28
+ spec.require_paths = ['lib']
28
29
 
29
- spec.add_runtime_dependency "activesupport", "~> 4.2", ">= 4.2.5"
30
+ spec.add_runtime_dependency 'activesupport', '>= 6.0'
30
31
 
31
- spec.add_development_dependency "bundler", "~> 1.10"
32
- spec.add_development_dependency "rake", "~> 10.0"
33
- spec.add_development_dependency "rspec", "~> 3.4"
32
+ spec.add_development_dependency 'bundler'
33
+ spec.add_development_dependency 'rake', '~> 13.0'
34
+ spec.add_development_dependency 'rspec', '~> 3.9'
34
35
  end
metadata CHANGED
@@ -1,88 +1,82 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: ruby_px
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.2.0
4
+ version: 0.7.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - Fernando Blat
8
8
  autorequire:
9
9
  bindir: exe
10
10
  cert_chain: []
11
- date: 2016-05-13 00:00:00.000000000 Z
11
+ date: 2021-03-05 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: activesupport
15
15
  requirement: !ruby/object:Gem::Requirement
16
16
  requirements:
17
- - - "~>"
18
- - !ruby/object:Gem::Version
19
- version: '4.2'
20
17
  - - ">="
21
18
  - !ruby/object:Gem::Version
22
- version: 4.2.5
19
+ version: '6.0'
23
20
  type: :runtime
24
21
  prerelease: false
25
22
  version_requirements: !ruby/object:Gem::Requirement
26
23
  requirements:
27
- - - "~>"
28
- - !ruby/object:Gem::Version
29
- version: '4.2'
30
24
  - - ">="
31
25
  - !ruby/object:Gem::Version
32
- version: 4.2.5
26
+ version: '6.0'
33
27
  - !ruby/object:Gem::Dependency
34
28
  name: bundler
35
29
  requirement: !ruby/object:Gem::Requirement
36
30
  requirements:
37
- - - "~>"
31
+ - - ">="
38
32
  - !ruby/object:Gem::Version
39
- version: '1.10'
33
+ version: '0'
40
34
  type: :development
41
35
  prerelease: false
42
36
  version_requirements: !ruby/object:Gem::Requirement
43
37
  requirements:
44
- - - "~>"
38
+ - - ">="
45
39
  - !ruby/object:Gem::Version
46
- version: '1.10'
40
+ version: '0'
47
41
  - !ruby/object:Gem::Dependency
48
42
  name: rake
49
43
  requirement: !ruby/object:Gem::Requirement
50
44
  requirements:
51
45
  - - "~>"
52
46
  - !ruby/object:Gem::Version
53
- version: '10.0'
47
+ version: '13.0'
54
48
  type: :development
55
49
  prerelease: false
56
50
  version_requirements: !ruby/object:Gem::Requirement
57
51
  requirements:
58
52
  - - "~>"
59
53
  - !ruby/object:Gem::Version
60
- version: '10.0'
54
+ version: '13.0'
61
55
  - !ruby/object:Gem::Dependency
62
56
  name: rspec
63
57
  requirement: !ruby/object:Gem::Requirement
64
58
  requirements:
65
59
  - - "~>"
66
60
  - !ruby/object:Gem::Version
67
- version: '3.4'
61
+ version: '3.9'
68
62
  type: :development
69
63
  prerelease: false
70
64
  version_requirements: !ruby/object:Gem::Requirement
71
65
  requirements:
72
66
  - - "~>"
73
67
  - !ruby/object:Gem::Version
74
- version: '3.4'
68
+ version: '3.9'
75
69
  description: Read PC-Axis files using Ruby
76
70
  email:
77
- - ferblape@gmail.com
71
+ - fernando@blat.es
78
72
  executables: []
79
73
  extensions: []
80
74
  extra_rdoc_files: []
81
75
  files:
82
76
  - ".gitignore"
83
77
  - ".rspec"
84
- - ".ruby-version"
85
78
  - ".travis.yml"
79
+ - CHANGELOG.md
86
80
  - CODE_OF_CONDUCT.md
87
81
  - Gemfile
88
82
  - LICENSE.txt
@@ -92,6 +86,7 @@ files:
92
86
  - bin/setup
93
87
  - lib/ruby_px.rb
94
88
  - lib/ruby_px/dataset.rb
89
+ - lib/ruby_px/dataset/data.rb
95
90
  - ruby_px.gemspec
96
91
  homepage: https://github.com/PopulateTools/ruby_px
97
92
  licenses:
@@ -113,10 +108,8 @@ required_rubygems_version: !ruby/object:Gem::Requirement
113
108
  - !ruby/object:Gem::Version
114
109
  version: '0'
115
110
  requirements: []
116
- rubyforge_project:
117
- rubygems_version: 2.4.5.1
111
+ rubygems_version: 3.1.2
118
112
  signing_key:
119
113
  specification_version: 4
120
114
  summary: Read PC-Axis files using Ruby
121
115
  test_files: []
122
- has_rdoc:
data/.ruby-version DELETED
@@ -1 +0,0 @@
1
- 2.2.3