zipcode-fr 0.1.0 → 1.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: 49ba8c551db2a645e35f59f485073ea85ec78c43
4
- data.tar.gz: b4079b54250461c4b1d6a2c9b1a6b2724d7ace87
3
+ metadata.gz: 2b951d8164fd96461f3b1c4549c231867c77d4bc
4
+ data.tar.gz: 27f411e06e2fefa64d9e84098c0d84cc0e23bd2e
5
5
  SHA512:
6
- metadata.gz: e39eef729b7634e97b4441af2abe6e70314bc078603fe295ecab2380672dee6c8f06dfb125c7dee72d8305d7ae8c43aa0cef8b61c32570c5f0c48a26d0d0a3f5
7
- data.tar.gz: 616c5eb703167666cbae11c086bf3534fbb4ea258ee9352852588a9aab658a0a7eeb2256d200427e336f307ae99dd162cff9ed2284110d0e8426ab8ea5dd8d57
6
+ metadata.gz: fe56c6094a32e80322b04d4b0aaf6dc1dc91679ed14c49f9c6393d79290185f83dfa4cffdafa088d73843716b3dcf15a282f939e1fbff0c45486105798567532
7
+ data.tar.gz: 6a6705c55e4846bfadc5ba5f3e269e1e1917fbbcd23bfe771cc393a80f01e9696d42066dfb5f9955364c3c7433d127c117d4c0dd007d8cee74c98eff1481ff5d
data/lib/zipcode-fr.rb CHANGED
@@ -1,4 +1,7 @@
1
1
  module ZipCode
2
+ # TODO: factor index system out
3
+ # TODO: factor country-independent code out
4
+ # rubocop:disable Metrics/ModuleLength
2
5
  module FR
3
6
  require 'csv'
4
7
 
@@ -8,8 +11,8 @@ module ZipCode
8
11
 
9
12
  def load
10
13
  # TODO: non-optimal, but not overly long either
11
- index!(:name, reader)
12
- index!(:zip, reader)
14
+ index!(:name, reader, [:word_prefix, :match])
15
+ index!(:zip, reader, :prefix)
13
16
  @loaded = true
14
17
  end
15
18
 
@@ -57,20 +60,77 @@ module ZipCode
57
60
  [:insee, :name, :zip, :alt_name].zip(row).to_h
58
61
  end
59
62
 
60
- def index!(name, data, key = nil)
63
+ def index!(name, data, modes = nil, key: nil)
61
64
  key ||= name
62
65
  index = Hash.new { |h, k| h[k] = [] unless h.frozen? }
63
66
 
64
- data.each do |pos, record|
65
- val = record[key]
66
- val.length.times { |i| index[val[0..i].hash] << pos }
67
+ modes = [modes] unless modes.is_a?(Enumerable)
68
+ modes.each do |mode|
69
+ data.each(&appender(index, key, mode))
67
70
  end
68
71
 
72
+ index.each { |_, v| v.uniq! }
69
73
  index.freeze
70
74
 
71
75
  @indexes[name] = index
72
76
  end
73
77
 
78
+ # TODO: create an appender registry
79
+ # rubocop:disable Metrics/AbcSize
80
+ # rubocop:disable Metrics/MethodLength
81
+ private def appender(idx, key, mode)
82
+ case mode
83
+ when :prefix
84
+ -> (pos, record) { append_prefixes(idx, pos, record[key]) }
85
+ when :infix
86
+ -> (pos, record) { append_infixes(idx, pos, record[key]) }
87
+ when :word
88
+ -> (pos, record) { append_words(idx, pos, record[key]) }
89
+ when :word_prefix
90
+ -> (pos, record) { append_word_prefixes(idx, pos, record[key]) }
91
+ else
92
+ -> (pos, record) { append_match(idx, pos, record[key]) }
93
+ end
94
+ end
95
+
96
+ private def append_match(idx, pos, val)
97
+ idx[val.hash] << pos
98
+ end
99
+
100
+ private def append_words(idx, pos, val)
101
+ each_word(val) { |w| idx[w.hash] << pos }
102
+ end
103
+
104
+ private def append_word_prefixes(idx, pos, val)
105
+ each_word(val) do |word|
106
+ each_prefix(word) { |prefix| idx[prefix.hash] << pos }
107
+ end
108
+ end
109
+
110
+ private def append_prefixes(idx, pos, val, min_size: 1)
111
+ each_prefix(val, min_size: min_size) { |prefix| idx[prefix.hash] << pos }
112
+ end
113
+
114
+ private def each_word(val, &block)
115
+ val.split.each(&block)
116
+ end
117
+
118
+ private def each_prefix(val, min_size: 1)
119
+ min_size.upto(val.length) { |i| yield val[0...i] }
120
+ end
121
+
122
+ private def each_suffix(val, min_size: 1)
123
+ min_size.upto(val.length) { |i| yield val[-i..-1] }
124
+ end
125
+
126
+ private def append_infixes(idx, pos, val, min_size: 1)
127
+ each_prefix(val, min_size: min_size) do |prefix|
128
+ each_suffix(prefix, min_size: min_size) do |infix|
129
+ idx[infix.hash] << pos
130
+ end
131
+ end
132
+ end
133
+
74
134
  private def index(name)
75
135
  if @indexes.key?(name)
76
136
  @indexes[name]
@@ -79,6 +139,12 @@ module ZipCode
79
139
  end
80
140
  end
81
141
 
142
+ def memsize_of_index(name)
143
+ require 'objspace'
144
+ ObjectSpace.memsize_of(@indexes[name]) +
145
+ @indexes[name].reduce(0) { |a, (_, v)| a + ObjectSpace.memsize_of(v) }
146
+ end
147
+
82
148
  private def read_at(*positions, count: 1)
83
149
  Enumerator.new do |y|
84
150
  open do |io|