zipcode-fr 0.1.0 → 1.0.0

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: 49ba8c551db2a645e35f59f485073ea85ec78c43
4
- data.tar.gz: b4079b54250461c4b1d6a2c9b1a6b2724d7ace87
3
+ metadata.gz: 2b951d8164fd96461f3b1c4549c231867c77d4bc
4
+ data.tar.gz: 27f411e06e2fefa64d9e84098c0d84cc0e23bd2e
5
5
  SHA512:
6
- metadata.gz: e39eef729b7634e97b4441af2abe6e70314bc078603fe295ecab2380672dee6c8f06dfb125c7dee72d8305d7ae8c43aa0cef8b61c32570c5f0c48a26d0d0a3f5
7
- data.tar.gz: 616c5eb703167666cbae11c086bf3534fbb4ea258ee9352852588a9aab658a0a7eeb2256d200427e336f307ae99dd162cff9ed2284110d0e8426ab8ea5dd8d57
6
+ metadata.gz: fe56c6094a32e80322b04d4b0aaf6dc1dc91679ed14c49f9c6393d79290185f83dfa4cffdafa088d73843716b3dcf15a282f939e1fbff0c45486105798567532
7
+ data.tar.gz: 6a6705c55e4846bfadc5ba5f3e269e1e1917fbbcd23bfe771cc393a80f01e9696d42066dfb5f9955364c3c7433d127c117d4c0dd007d8cee74c98eff1481ff5d
data/lib/zipcode-fr.rb CHANGED
@@ -1,4 +1,7 @@
1
1
  module ZipCode
2
+ # TODO: factor index system out
3
+ # TODO: factor country-independent code out
4
+ # rubocop:disable Metrics/ModuleLength
2
5
  module FR
3
6
  require 'csv'
4
7
 
@@ -8,8 +11,8 @@ module ZipCode
8
11
 
9
12
  def load
10
13
  # TODO: non-optimal, but not overly long either
11
- index!(:name, reader)
12
- index!(:zip, reader)
14
+ index!(:name, reader, [:word_prefix, :match])
15
+ index!(:zip, reader, :prefix)
13
16
  @loaded = true
14
17
  end
15
18
 
@@ -57,20 +60,77 @@ module ZipCode
57
60
  [:insee, :name, :zip, :alt_name].zip(row).to_h
58
61
  end
59
62
 
60
- def index!(name, data, key = nil)
63
+ def index!(name, data, modes = nil, key: nil)
61
64
  key ||= name
62
65
  index = Hash.new { |h, k| h[k] = [] unless h.frozen? }
63
66
 
64
- data.each do |pos, record|
65
- val = record[key]
66
- val.length.times { |i| index[val[0..i].hash] << pos }
67
+ modes = [modes] unless modes.is_a?(Enumerable)
68
+ modes.each do |mode|
69
+ data.each(&appender(index, key, mode))
67
70
  end
68
71
 
72
+ index.each { |_, v| v.uniq! }
69
73
  index.freeze
70
74
 
71
75
  @indexes[name] = index
72
76
  end
73
77
 
78
+ # TODO: create an appender registry
79
+ # rubocop:disable Metrics/AbcSize
80
+ # rubocop:disable Metrics/MethodLength
81
+ private def appender(idx, key, mode)
82
+ case mode
83
+ when :prefix
84
+ -> (pos, record) { append_prefixes(idx, pos, record[key]) }
85
+ when :infix
86
+ -> (pos, record) { append_infixes(idx, pos, record[key]) }
87
+ when :word
88
+ -> (pos, record) { append_words(idx, pos, record[key]) }
89
+ when :word_prefix
90
+ -> (pos, record) { append_word_prefixes(idx, pos, record[key]) }
91
+ else
92
+ -> (pos, record) { append_match(idx, pos, record[key]) }
93
+ end
94
+ end
95
+
96
+ private def append_match(idx, pos, val)
97
+ idx[val.hash] << pos
98
+ end
99
+
100
+ private def append_words(idx, pos, val)
101
+ each_word(val) { |w| idx[w.hash] << pos }
102
+ end
103
+
104
+ private def append_word_prefixes(idx, pos, val)
105
+ each_word(val) do |word|
106
+ each_prefix(word) { |prefix| idx[prefix.hash] << pos }
107
+ end
108
+ end
109
+
110
+ private def append_prefixes(idx, pos, val, min_size: 1)
111
+ each_prefix(val, min_size: min_size) { |prefix| idx[prefix.hash] << pos }
112
+ end
113
+
114
+ private def each_word(val, &block)
115
+ val.split.each(&block)
116
+ end
117
+
118
+ private def each_prefix(val, min_size: 1)
119
+ min_size.upto(val.length) { |i| yield val[0...i] }
120
+ end
121
+
122
+ private def each_suffix(val, min_size: 1)
123
+ min_size.upto(val.length) { |i| yield val[-i..-1] }
124
+ end
125
+
126
+ private def append_infixes(idx, pos, val, min_size: 1)
127
+ each_prefix(val, min_size: min_size) do |prefix|
128
+ each_suffix(prefix, min_size: min_size) do |infix|
129
+ idx[infix.hash] << pos
130
+ end
131
+ end
132
+ end
133
+
74
134
  private def index(name)
75
135
  if @indexes.key?(name)
76
136
  @indexes[name]
@@ -79,6 +139,12 @@ module ZipCode
79
139
  end
80
140
  end
81
141
 
142
+ def memsize_of_index(name)
143
+ require 'objspace'
144
+ ObjectSpace.memsize_of(@indexes[name]) +
145
+ @indexes[name].reduce(0) { |a, (_, v)| a + ObjectSpace.memsize_of(v) }
146
+ end
147
+
82
148
  private def read_at(*positions, count: 1)
83
149
  Enumerator.new do |y|
84
150
  open do |io|