ruby_px 0.2.0 → 0.7.0

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
- SHA1:
3
- metadata.gz: 44941ebae6202b2580d1303054c643bd3e713971
4
- data.tar.gz: 4d9b8e3bc4dc2ae1156366473137d68618f31b05
2
+ SHA256:
3
+ metadata.gz: 9abe10d6ad1aed7bb9d28535671457cad398d7c17cab9c9bc49376dd46be5a3a
4
+ data.tar.gz: 95e9b594074e45eaa34e319c2ef58fc73edf4e5f6ceee9ae7c67bb2919407425
5
5
  SHA512:
6
- metadata.gz: 915d7fa2b750f11a241d25b2188210e4cbcceb2041a6c501fe78df65f480e21bce23bf172be39d1972d910ad05fea61277706fe543124a2e2bd9d5371a5aebcf
7
- data.tar.gz: 37b7bdc3faa735d97cbd04cc341e8811a49f3340b69a8e25ed21cd150e7a0520ced342c5d1fc7b7f81da786f13c941fa48c43b122f4d91cb3ff070b2451c0361
6
+ metadata.gz: ff283ea31198ae6fd560b591ac6c1d7dc649a148a61a3a4570953c380c22f4c667fe15a569454df1649d744845b62734051d89863e7e8c523e2f270ea77b3762
7
+ data.tar.gz: a36341f8e244874a2d588b0e28319c43da1f9f8b2488007ea79f1cccf16be8db544bf59839d6c1dc8f495e0581e2f3341922c41df9ef6a3d907f57748a0e31f2
data/.gitignore CHANGED
@@ -8,3 +8,4 @@
8
8
  /spec/reports/
9
9
  /tmp/
10
10
  .byebug_history
11
+ .ruby-version
data/.travis.yml CHANGED
@@ -1,4 +1,5 @@
1
1
  language: ruby
2
2
  rvm:
3
- - 2.2.3
4
- before_install: gem install bundler -v 1.10.6
3
+ - 2.7.1
4
+ - 2.6.3
5
+ before_install: gem install bundler
data/CHANGELOG.md ADDED
@@ -0,0 +1,14 @@
1
+ # CHANGELOG
2
+
3
+ ## 0.7.0 (2021-03-04)
4
+
5
+ - Performance in large datasets [#9](https://github.com/PopulateTools/ruby_px/pull/9)
6
+
7
+ ## 0.6.0 (2020-04-28)
8
+
9
+ - Nice badge with the current version
10
+ - Added Changelog!
11
+ - Updated dependencies
12
+ - Fix multilingual VALUES and equals signs in values [#7](https://github.com/PopulateTools/ruby_px/pull/7)
13
+ - Use Rubocop to check syntax
14
+
data/Gemfile CHANGED
@@ -1,3 +1,5 @@
1
+ # frozen_string_literal: true
2
+
1
3
  source 'https://rubygems.org'
2
4
 
3
5
  # Specify your gem's dependencies in ruby_px.gemspec
data/README.md CHANGED
@@ -1,5 +1,6 @@
1
1
  # RubyPx
2
2
 
3
+ [![Gem Version](https://badge.fury.io/rb/ruby_px.svg)](https://badge.fury.io/rb/ruby_px)
3
4
  [![Build Status](https://travis-ci.org/PopulateTools/ruby_px.svg?branch=master)](https://travis-ci.org/PopulateTools/ruby_px)
4
5
 
5
6
  Work with PC-Axis files using Ruby.
@@ -82,7 +83,6 @@ dataset.data('edad (año a año)' => 'Total', 'sexo' => 'Ambos sexos')
82
83
 
83
84
  ## TODO
84
85
 
85
- - Allow to receive an URL as an argument
86
86
  - Refactor
87
87
  - Test the gem with more files
88
88
  - Speed-up the parsing time
data/Rakefile CHANGED
@@ -1,6 +1,8 @@
1
- require "bundler/gem_tasks"
2
- require "rspec/core/rake_task"
1
+ # frozen_string_literal: true
2
+
3
+ require 'bundler/gem_tasks'
4
+ require 'rspec/core/rake_task'
3
5
 
4
6
  RSpec::Core::RakeTask.new(:spec)
5
7
 
6
- task :default => :spec
8
+ task default: :spec
data/bin/console CHANGED
@@ -1,7 +1,8 @@
1
1
  #!/usr/bin/env ruby
2
+ # frozen_string_literal: true
2
3
 
3
- require "bundler/setup"
4
- require "pc-axis/dataset"
4
+ require 'bundler/setup'
5
+ require_relative '../lib/ruby_px'
5
6
 
6
7
  # You can add fixtures and/or initialization code here to make experimenting
7
8
  # with your gem easier. You can also use a different console, if you like.
@@ -10,5 +11,5 @@ require "pc-axis/dataset"
10
11
  # require "pry"
11
12
  # Pry.start
12
13
 
13
- require "irb"
14
+ require 'irb'
14
15
  IRB.start
data/lib/ruby_px.rb CHANGED
@@ -1,3 +1,5 @@
1
+ # frozen_string_literal: true
2
+
1
3
  require 'active_support/all'
2
4
 
3
5
  module RubyPx
@@ -1,23 +1,25 @@
1
+ # frozen_string_literal: true
2
+
3
+ require 'open-uri'
4
+
1
5
  module RubyPx
2
6
  class Dataset
7
+ require 'ruby_px/dataset/data'
8
+
3
9
  attr_reader :headings, :stubs
4
10
 
5
- METADATA_RECORDS = ['TITLE','UNITS','SOURCE','CONTACT','LAST-UPDATED','CREATION-DATE']
11
+ METADATA_RECORDS = %w[TITLE UNITS SOURCE CONTACT LAST-UPDATED CREATION-DATE].freeze
6
12
  HEADING_RECORD = 'HEADING'
7
13
  STUB_RECORD = 'STUB'
8
14
 
9
- def initialize(file)
10
- if !File.readable?(file)
11
- raise "File #{file} not readable"
12
- end
13
-
15
+ def initialize(resource_uri)
14
16
  @metadata = {}
15
17
  @headings = []
16
18
  @stubs = []
17
19
  @values = {}
18
- @data = []
20
+ @data = Data.new
19
21
 
20
- parse_file(file)
22
+ parse_resource(resource_uri)
21
23
  end
22
24
 
23
25
  def title
@@ -54,8 +56,10 @@ module RubyPx
54
56
 
55
57
  def data(options)
56
58
  # Validate parameters
57
- options.each do |k,v|
58
- raise "Invalid value #{v} for dimension #{k}" unless dimension(k).include?(v)
59
+ options.each do |k, v|
60
+ unless dimension(k).include?(v)
61
+ raise "Invalid value #{v} for dimension #{k}"
62
+ end
59
63
  end
60
64
 
61
65
  # Return a single value
@@ -67,20 +71,20 @@ module RubyPx
67
71
 
68
72
  # positions are i, j, k
69
73
  positions = (stubs + headings).map do |dimension_name|
70
- self.dimension(dimension_name).index(options[dimension_name])
74
+ dimension(dimension_name).index(options[dimension_name])
71
75
  end
72
76
 
73
77
  # dimension_sizes are from all dimensions except the first one
74
78
  dimension_sizes = (stubs + headings)[1..-1].map do |dimension_name|
75
- self.dimension(dimension_name).length
79
+ dimension(dimension_name).length
76
80
  end
77
81
 
78
82
  positions.each_with_index do |p, i|
79
83
  d = dimension_sizes[i..-1].reduce(&:*)
80
- offset += (d ? p*d : p)
84
+ offset += (d ? p * d : p)
81
85
  end
82
86
 
83
- return @data[offset]
87
+ @data.at(offset)
84
88
 
85
89
  # Return an array of options
86
90
  elsif options.length == dimensions.length - 1
@@ -91,30 +95,31 @@ module RubyPx
91
95
  result << data(options.merge(missing_dimension => dimension_value))
92
96
  end
93
97
 
94
- return result
98
+ result
95
99
  else
96
- raise "Not implented yet, sorry"
100
+ raise 'Not implented yet, sorry'
97
101
  end
98
102
  end
99
103
 
100
104
  def inspect
101
- "#<#{self.class.name}:#{self.object_id}>"
105
+ "#<#{self.class.name}:#{object_id}>"
102
106
  end
103
107
 
104
108
  private
105
109
 
106
- def parse_file(file)
107
- File.foreach(file) do |line|
110
+ def parse_resource(resource_uri)
111
+ open(resource_uri).each_line do |line|
108
112
  parse_line(line.chomp)
109
113
  end
110
- return true
114
+
115
+ true
111
116
  end
112
117
 
113
118
  def parse_line(line)
114
- @line = line
119
+ @line = line.force_encoding('utf-8').encode('utf-8')
115
120
 
116
121
  if @current_record.nil?
117
- key, value = line.scan(/[^\=]+/)
122
+ key, value = line.split('=', 2)
118
123
  set_current_record(key)
119
124
  else
120
125
  value = line
@@ -123,27 +128,25 @@ module RubyPx
123
128
  return if @current_record.nil? || value.nil?
124
129
 
125
130
  if @type == :data
126
- value = value.split(' ')
131
+ value = value.split(/[\ ;,\t]/).delete_if(&:blank?).each(&:strip)
127
132
 
128
- add_value_to_bucket(bucket,value) unless value == [';']
133
+ add_value_to_bucket(bucket, value) unless value == [';']
129
134
  else
130
135
  # First format: "\"20141201\";"
131
136
  if value =~ /\A\"([^"]+)\";\z/
132
137
  value = value.match(/\A\"([^"]+)\";\z/)[1]
133
138
  add_value_to_bucket(bucket, value.strip)
134
139
 
135
- # Second format: "Ambos sexos","Hombres","Mujeres";
140
+ # Second format: "Ambos sexos","Hombres","Mujeres";
136
141
  elsif value =~ /\"([^"]+)\",?/
137
- value = value.split(/\"([^"]+)\",?;?/).delete_if{ |s| s.blank? }.each(&:strip)
142
+ value = value.split(/\"([^"]+)\",?;?/).delete_if(&:blank?).each(&:strip)
138
143
  add_value_to_bucket(bucket, value)
139
144
  end
140
145
  end
141
146
 
142
147
  # If we see a ; at the end of the line, close out the record so we
143
148
  # expect a new record.
144
- if line[-1..-1] == ";"
145
- @current_record = nil
146
- end
149
+ @current_record = nil if line[-1..-1] == ';'
147
150
  end
148
151
 
149
152
  def set_current_record(key)
@@ -156,7 +159,7 @@ module RubyPx
156
159
  elsif key == STUB_RECORD
157
160
  @type = :stubs
158
161
  key
159
- elsif key =~ /\AVALUES/
162
+ elsif key =~ /\AVALUES/ && key !~ /\[\w\w\]/
160
163
  @type = :values
161
164
  key.match(/\"([^"]+)\"/)[1]
162
165
  elsif key =~ /\ADATA/
@@ -175,22 +178,20 @@ module RubyPx
175
178
  elsif @type == :headings || @type == :stubs
176
179
  bucket << value
177
180
  bucket.flatten!
178
- else
179
- if bucket.is_a?(Hash)
180
- if value.is_a?(Array)
181
- value = value.map(&:strip)
182
- elsif value.is_a?(String)
183
- value.strip!
184
- end
185
- if bucket[@current_record].nil?
186
- bucket[@current_record] = value
187
- else
188
- bucket[@current_record].concat([value])
189
- bucket[@current_record].flatten!
190
- end
181
+ elsif bucket.is_a?(Hash)
182
+ if value.is_a?(Array)
183
+ value = value.map(&:strip)
184
+ elsif value.is_a?(String)
185
+ value.strip!
186
+ end
187
+ if bucket[@current_record].nil?
188
+ value = Array.wrap(value) if @type == :values
189
+ bucket[@current_record] = value
190
+ else
191
+ bucket[@current_record].concat([value])
192
+ bucket[@current_record].flatten!
191
193
  end
192
194
  end
193
195
  end
194
-
195
196
  end
196
197
  end
@@ -0,0 +1,48 @@
1
+ module RubyPx
2
+ class Dataset
3
+ class Data
4
+
5
+ CHUNK_SIZE = 5_000
6
+ attr_accessor :current_chunk_index
7
+
8
+ def initialize
9
+ @current_chunk_index = 0
10
+ end
11
+
12
+ def at index
13
+ chunk_index = index/CHUNK_SIZE
14
+ index_inside_chunk = index%CHUNK_SIZE
15
+
16
+ get_chunk(chunk_index)[index_inside_chunk]
17
+ end
18
+
19
+ def concat array
20
+ current_chunk.concat(array)
21
+ if current_chunk.size > CHUNK_SIZE
22
+ excess = current_chunk.pop(current_chunk.size-CHUNK_SIZE)
23
+ self.current_chunk_index += 1
24
+ concat(excess)
25
+ end
26
+ end
27
+
28
+ def indexes_count
29
+ self.current_chunk_index+1
30
+ end
31
+
32
+ private
33
+
34
+
35
+ def current_chunk
36
+ current = instance_variable_get("@chunk_#{self.current_chunk_index}")
37
+ return current if current
38
+
39
+ instance_variable_set("@chunk_#{self.current_chunk_index}", [])
40
+ end
41
+
42
+ def get_chunk chunk_index
43
+ instance_variable_get("@chunk_#{chunk_index}")
44
+ end
45
+
46
+ end
47
+ end
48
+ end
data/ruby_px.gemspec CHANGED
@@ -1,34 +1,35 @@
1
- # coding: utf-8
2
- lib = File.expand_path('../lib', __FILE__)
1
+ # frozen_string_literal: true
2
+
3
+ lib = File.expand_path('lib', __dir__)
3
4
  $LOAD_PATH.unshift(lib) unless $LOAD_PATH.include?(lib)
4
5
 
5
6
  Gem::Specification.new do |spec|
6
- spec.name = "ruby_px"
7
- spec.version = '0.2.0'
8
- spec.authors = ["Fernando Blat"]
9
- spec.email = ["ferblape@gmail.com"]
7
+ spec.name = 'ruby_px'
8
+ spec.version = '0.7.0'
9
+ spec.authors = ['Fernando Blat']
10
+ spec.email = ['fernando@blat.es']
10
11
 
11
- spec.summary = %q{Read PC-Axis files using Ruby}
12
- spec.description = %q{Read PC-Axis files using Ruby}
13
- spec.homepage = "https://github.com/PopulateTools/ruby_px"
14
- spec.license = "MIT"
12
+ spec.summary = 'Read PC-Axis files using Ruby'
13
+ spec.description = 'Read PC-Axis files using Ruby'
14
+ spec.homepage = 'https://github.com/PopulateTools/ruby_px'
15
+ spec.license = 'MIT'
15
16
 
16
17
  # Prevent pushing this gem to RubyGems.org by setting 'allowed_push_host', or
17
18
  # delete this section to allow pushing this gem to any host.
18
19
  if spec.respond_to?(:metadata)
19
- spec.metadata['allowed_push_host'] = "https://rubygems.org"
20
+ spec.metadata['allowed_push_host'] = 'https://rubygems.org'
20
21
  else
21
- raise "RubyGems 2.0 or newer is required to protect against public gem pushes."
22
+ raise 'RubyGems 2.0 or newer is required to protect against public gem pushes.'
22
23
  end
23
24
 
24
25
  spec.files = `git ls-files -z`.split("\x0").reject { |f| f.match(%r{^(test|spec|features)/}) }
25
- spec.bindir = "exe"
26
+ spec.bindir = 'exe'
26
27
  spec.executables = spec.files.grep(%r{^exe/}) { |f| File.basename(f) }
27
- spec.require_paths = ["lib"]
28
+ spec.require_paths = ['lib']
28
29
 
29
- spec.add_runtime_dependency "activesupport", "~> 4.2", ">= 4.2.5"
30
+ spec.add_runtime_dependency 'activesupport', '>= 6.0'
30
31
 
31
- spec.add_development_dependency "bundler", "~> 1.10"
32
- spec.add_development_dependency "rake", "~> 10.0"
33
- spec.add_development_dependency "rspec", "~> 3.4"
32
+ spec.add_development_dependency 'bundler'
33
+ spec.add_development_dependency 'rake', '~> 13.0'
34
+ spec.add_development_dependency 'rspec', '~> 3.9'
34
35
  end
metadata CHANGED
@@ -1,88 +1,82 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: ruby_px
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.2.0
4
+ version: 0.7.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - Fernando Blat
8
8
  autorequire:
9
9
  bindir: exe
10
10
  cert_chain: []
11
- date: 2016-05-13 00:00:00.000000000 Z
11
+ date: 2021-03-05 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: activesupport
15
15
  requirement: !ruby/object:Gem::Requirement
16
16
  requirements:
17
- - - "~>"
18
- - !ruby/object:Gem::Version
19
- version: '4.2'
20
17
  - - ">="
21
18
  - !ruby/object:Gem::Version
22
- version: 4.2.5
19
+ version: '6.0'
23
20
  type: :runtime
24
21
  prerelease: false
25
22
  version_requirements: !ruby/object:Gem::Requirement
26
23
  requirements:
27
- - - "~>"
28
- - !ruby/object:Gem::Version
29
- version: '4.2'
30
24
  - - ">="
31
25
  - !ruby/object:Gem::Version
32
- version: 4.2.5
26
+ version: '6.0'
33
27
  - !ruby/object:Gem::Dependency
34
28
  name: bundler
35
29
  requirement: !ruby/object:Gem::Requirement
36
30
  requirements:
37
- - - "~>"
31
+ - - ">="
38
32
  - !ruby/object:Gem::Version
39
- version: '1.10'
33
+ version: '0'
40
34
  type: :development
41
35
  prerelease: false
42
36
  version_requirements: !ruby/object:Gem::Requirement
43
37
  requirements:
44
- - - "~>"
38
+ - - ">="
45
39
  - !ruby/object:Gem::Version
46
- version: '1.10'
40
+ version: '0'
47
41
  - !ruby/object:Gem::Dependency
48
42
  name: rake
49
43
  requirement: !ruby/object:Gem::Requirement
50
44
  requirements:
51
45
  - - "~>"
52
46
  - !ruby/object:Gem::Version
53
- version: '10.0'
47
+ version: '13.0'
54
48
  type: :development
55
49
  prerelease: false
56
50
  version_requirements: !ruby/object:Gem::Requirement
57
51
  requirements:
58
52
  - - "~>"
59
53
  - !ruby/object:Gem::Version
60
- version: '10.0'
54
+ version: '13.0'
61
55
  - !ruby/object:Gem::Dependency
62
56
  name: rspec
63
57
  requirement: !ruby/object:Gem::Requirement
64
58
  requirements:
65
59
  - - "~>"
66
60
  - !ruby/object:Gem::Version
67
- version: '3.4'
61
+ version: '3.9'
68
62
  type: :development
69
63
  prerelease: false
70
64
  version_requirements: !ruby/object:Gem::Requirement
71
65
  requirements:
72
66
  - - "~>"
73
67
  - !ruby/object:Gem::Version
74
- version: '3.4'
68
+ version: '3.9'
75
69
  description: Read PC-Axis files using Ruby
76
70
  email:
77
- - ferblape@gmail.com
71
+ - fernando@blat.es
78
72
  executables: []
79
73
  extensions: []
80
74
  extra_rdoc_files: []
81
75
  files:
82
76
  - ".gitignore"
83
77
  - ".rspec"
84
- - ".ruby-version"
85
78
  - ".travis.yml"
79
+ - CHANGELOG.md
86
80
  - CODE_OF_CONDUCT.md
87
81
  - Gemfile
88
82
  - LICENSE.txt
@@ -92,6 +86,7 @@ files:
92
86
  - bin/setup
93
87
  - lib/ruby_px.rb
94
88
  - lib/ruby_px/dataset.rb
89
+ - lib/ruby_px/dataset/data.rb
95
90
  - ruby_px.gemspec
96
91
  homepage: https://github.com/PopulateTools/ruby_px
97
92
  licenses:
@@ -113,10 +108,8 @@ required_rubygems_version: !ruby/object:Gem::Requirement
113
108
  - !ruby/object:Gem::Version
114
109
  version: '0'
115
110
  requirements: []
116
- rubyforge_project:
117
- rubygems_version: 2.4.5.1
111
+ rubygems_version: 3.1.2
118
112
  signing_key:
119
113
  specification_version: 4
120
114
  summary: Read PC-Axis files using Ruby
121
115
  test_files: []
122
- has_rdoc:
data/.ruby-version DELETED
@@ -1 +0,0 @@
1
- 2.2.3