fias 0.0.2 → 1.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/.gitignore +15 -22
- data/.rubocop.yml +7 -0
- data/.travis.yml +10 -0
- data/Gemfile +1 -1
- data/LICENSE.txt +2 -2
- data/README.md +259 -155
- data/Rakefile +6 -1
- data/config/names.txt +0 -0
- data/config/synonyms.yml +50 -0
- data/examples/create.rb +106 -0
- data/examples/generate_index.rb +63 -0
- data/fias.gemspec +33 -21
- data/lib/fias.rb +197 -10
- data/lib/fias/config.rb +74 -0
- data/lib/fias/import/copy.rb +62 -0
- data/lib/fias/import/dbf.rb +81 -0
- data/lib/fias/import/download_service.rb +37 -0
- data/lib/fias/import/restore_parent_id.rb +51 -0
- data/lib/fias/import/tables.rb +74 -0
- data/lib/fias/name/append.rb +30 -0
- data/lib/fias/name/canonical.rb +42 -0
- data/lib/fias/name/extract.rb +85 -0
- data/lib/fias/name/house_number.rb +71 -0
- data/lib/fias/name/split.rb +60 -0
- data/lib/fias/name/synonyms.rb +93 -0
- data/lib/fias/query.rb +43 -0
- data/lib/fias/query/estimate.rb +67 -0
- data/lib/fias/query/finder.rb +75 -0
- data/lib/fias/query/params.rb +101 -0
- data/lib/fias/railtie.rb +3 -17
- data/lib/fias/version.rb +1 -1
- data/spec/fixtures/ACTSTAT.DBF +0 -0
- data/spec/fixtures/NORDOC99.DBF +0 -0
- data/spec/fixtures/STRSTAT.DBF +0 -0
- data/spec/fixtures/addressing.yml +93 -0
- data/spec/fixtures/query.yml +79 -0
- data/spec/fixtures/query_sanitization.yml +75 -0
- data/spec/fixtures/status_append.yml +60 -0
- data/spec/lib/import/copy_spec.rb +44 -0
- data/spec/lib/import/dbf_spec.rb +28 -0
- data/spec/lib/import/download_service_spec.rb +15 -0
- data/spec/lib/import/restore_parent_id_spec.rb +34 -0
- data/spec/lib/import/tables_spec.rb +26 -0
- data/spec/lib/name/append_spec.rb +14 -0
- data/spec/lib/name/canonical_spec.rb +20 -0
- data/spec/lib/name/extract_spec.rb +67 -0
- data/spec/lib/name/house_number_spec.rb +45 -0
- data/spec/lib/name/query_spec.rb +21 -0
- data/spec/lib/name/split_spec.rb +15 -0
- data/spec/lib/name/synonyms_spec.rb +51 -0
- data/spec/lib/query/params_spec.rb +15 -0
- data/spec/lib/query_spec.rb +27 -0
- data/spec/spec_helper.rb +30 -0
- data/spec/support/db.rb +30 -0
- data/spec/support/query.rb +13 -0
- data/tasks/db.rake +52 -0
- data/tasks/download.rake +15 -0
- metadata +246 -64
- data/lib/fias/active_record/address_object.rb +0 -231
- data/lib/fias/active_record/address_object_type.rb +0 -15
- data/lib/fias/dbf_wrapper.rb +0 -90
- data/lib/fias/importer.rb +0 -30
- data/lib/fias/importer/base.rb +0 -59
- data/lib/fias/importer/pg.rb +0 -81
- data/lib/fias/importer/sqlite.rb +0 -38
- data/lib/generators/fias/migration.rb +0 -34
- data/lib/generators/fias/templates/create_fias_tables.rb +0 -5
- data/tasks/fias.rake +0 -68
data/lib/fias/config.rb
ADDED
@@ -0,0 +1,74 @@
|
|
1
|
+
module Fias
|
2
|
+
class Config
|
3
|
+
def initialize
|
4
|
+
@index = {}
|
5
|
+
@longs = {}
|
6
|
+
@shorts = {}
|
7
|
+
@aliases = {}
|
8
|
+
@exceptions = {}
|
9
|
+
@proper_names = []
|
10
|
+
@synonyms = []
|
11
|
+
@synonyms_index = {}
|
12
|
+
|
13
|
+
yield(self)
|
14
|
+
|
15
|
+
finalize_index
|
16
|
+
end
|
17
|
+
|
18
|
+
attr_reader :index, :longs, :shorts, :aliases, :exceptions
|
19
|
+
attr_reader :proper_names, :synonyms, :synonyms_index
|
20
|
+
|
21
|
+
def add_name(long, short, aliases = [])
|
22
|
+
@longs[Unicode.downcase(short)] = long
|
23
|
+
@shorts[Unicode.downcase(long)] = short
|
24
|
+
@aliases[Unicode.downcase(long)] = aliases
|
25
|
+
|
26
|
+
populate_index(long, short, aliases)
|
27
|
+
end
|
28
|
+
|
29
|
+
def add_exception(long, short)
|
30
|
+
@exceptions[Unicode.downcase(short)] = [long, short]
|
31
|
+
@exceptions[Unicode.downcase(long)] = [long, short]
|
32
|
+
end
|
33
|
+
|
34
|
+
def add_proper_name(name)
|
35
|
+
@proper_names << name
|
36
|
+
end
|
37
|
+
|
38
|
+
def add_synonym(*names)
|
39
|
+
@synonyms << names
|
40
|
+
populate_synonyms_index(names)
|
41
|
+
end
|
42
|
+
|
43
|
+
private
|
44
|
+
|
45
|
+
def populate_index(long, short, aliases)
|
46
|
+
long_downcase = Unicode.downcase(long)
|
47
|
+
short_downcase = Unicode.downcase(short)
|
48
|
+
|
49
|
+
populate_long_permutations(long)
|
50
|
+
|
51
|
+
if long_downcase != short_downcase
|
52
|
+
@index[short_downcase] = long
|
53
|
+
@index[short_downcase[0..-2]] = long if short_downcase[-1] == '.'
|
54
|
+
end
|
55
|
+
|
56
|
+
aliases.each { |al| @index[Unicode.downcase(al)] = long }
|
57
|
+
end
|
58
|
+
|
59
|
+
def populate_long_permutations(long)
|
60
|
+
Unicode.downcase(long).split(' ').permutation.each do |variant|
|
61
|
+
@index[variant.join(' ')] = long
|
62
|
+
end
|
63
|
+
end
|
64
|
+
|
65
|
+
def finalize_index
|
66
|
+
@index = @index.sort_by { |key, _| key.size }.reverse
|
67
|
+
@index = Hash[*@index.flatten]
|
68
|
+
end
|
69
|
+
|
70
|
+
def populate_synonyms_index(names)
|
71
|
+
names.each { |name| @synonyms_index[name] = names }
|
72
|
+
end
|
73
|
+
end
|
74
|
+
end
|
@@ -0,0 +1,62 @@
|
|
1
|
+
module Fias
|
2
|
+
module Import
|
3
|
+
class Copy
|
4
|
+
attr_reader :dbf, :table_name
|
5
|
+
|
6
|
+
def initialize(db, table_name, dbf, types = {})
|
7
|
+
@db = db
|
8
|
+
@table_name = table_name.to_sym
|
9
|
+
@dbf = dbf
|
10
|
+
@encoder = PgDataEncoder::EncodeForCopy.new(
|
11
|
+
column_types: map_types(types)
|
12
|
+
)
|
13
|
+
end
|
14
|
+
|
15
|
+
def encode
|
16
|
+
@dbf.each do |record|
|
17
|
+
line = record.to_a.map { |v| v == '' ? nil : v }
|
18
|
+
@encoder.add(line)
|
19
|
+
yield if block_given?
|
20
|
+
end
|
21
|
+
end
|
22
|
+
|
23
|
+
def copy
|
24
|
+
prepare
|
25
|
+
copy_into
|
26
|
+
end
|
27
|
+
|
28
|
+
private
|
29
|
+
|
30
|
+
def map_types(types)
|
31
|
+
types = types.map do |name, type|
|
32
|
+
index = columns.index(name.to_sym)
|
33
|
+
[index, type] if index
|
34
|
+
end
|
35
|
+
Hash[*types.compact.flatten]
|
36
|
+
end
|
37
|
+
|
38
|
+
def columns
|
39
|
+
@columns ||= @dbf.columns.map(&:name).map(&:downcase).map(&:to_sym)
|
40
|
+
end
|
41
|
+
|
42
|
+
def prepare
|
43
|
+
@db[@table_name].truncate
|
44
|
+
@db.run('SET client_min_messages TO warning;')
|
45
|
+
end
|
46
|
+
|
47
|
+
def copy_into
|
48
|
+
io = @encoder.get_io
|
49
|
+
|
50
|
+
@db.copy_into(@table_name.to_sym, columns: columns, format: :binary) do
|
51
|
+
begin
|
52
|
+
io.readpartial(BLOCK_SIZE)
|
53
|
+
rescue EOFError => _e
|
54
|
+
nil
|
55
|
+
end
|
56
|
+
end
|
57
|
+
end
|
58
|
+
|
59
|
+
BLOCK_SIZE = 65_536 # 10_240
|
60
|
+
end
|
61
|
+
end
|
62
|
+
end
|
@@ -0,0 +1,81 @@
|
|
1
|
+
module Fias
|
2
|
+
module Import
|
3
|
+
class Dbf
|
4
|
+
def initialize(path, encoding = DEFAULT_ENCODING)
|
5
|
+
@path = path
|
6
|
+
@files = {}
|
7
|
+
|
8
|
+
unless Dir.exist?(@path)
|
9
|
+
fail ArgumentError, "FIAS database path #{@path} does not exists"
|
10
|
+
end
|
11
|
+
|
12
|
+
open_files(encoding)
|
13
|
+
end
|
14
|
+
|
15
|
+
def only(*names)
|
16
|
+
return @files if names.empty?
|
17
|
+
|
18
|
+
names = names.map do |name|
|
19
|
+
name = name.to_sym
|
20
|
+
name == :houses ? HOUSE_TABLES.keys : name
|
21
|
+
name == :nordocs ? NORDOC_TABLES.keys : name
|
22
|
+
end
|
23
|
+
|
24
|
+
names.flatten!
|
25
|
+
|
26
|
+
@files.slice(*names)
|
27
|
+
end
|
28
|
+
|
29
|
+
attr_reader :files
|
30
|
+
|
31
|
+
private
|
32
|
+
|
33
|
+
def open_files(encoding)
|
34
|
+
TABLES.each do |accessor, dbf_filename|
|
35
|
+
filename = File.join(@path, dbf_filename)
|
36
|
+
|
37
|
+
next unless File.exist?(filename)
|
38
|
+
|
39
|
+
dbf = DBF::Table.new(filename, nil, encoding)
|
40
|
+
@files[accessor] = dbf if dbf
|
41
|
+
end
|
42
|
+
end
|
43
|
+
|
44
|
+
def self.n_tables(title)
|
45
|
+
tables = (1..99).map do |n|
|
46
|
+
[
|
47
|
+
format('%s%0.2d', title, n).to_sym,
|
48
|
+
format('%s%0.2d.DBF', title.upcase, n)
|
49
|
+
]
|
50
|
+
end
|
51
|
+
|
52
|
+
tables.flatten!
|
53
|
+
|
54
|
+
Hash[*tables]
|
55
|
+
end
|
56
|
+
|
57
|
+
HOUSE_TABLES = n_tables('house')
|
58
|
+
NORDOC_TABLES = n_tables('nordoc')
|
59
|
+
|
60
|
+
TABLES = {
|
61
|
+
address_object_types: 'SOCRBASE.DBF',
|
62
|
+
current_statuses: 'CURENTST.DBF',
|
63
|
+
actual_statuses: 'ACTSTAT.DBF',
|
64
|
+
operation_statuses: 'OPERSTAT.DBF',
|
65
|
+
center_statuses: 'CENTERST.DBF',
|
66
|
+
interval_statuses: 'INTVSTAT.DBF',
|
67
|
+
estate_statues: 'ESTSTAT.DBF',
|
68
|
+
structure_statuses: 'STRSTAT.DBF',
|
69
|
+
address_objects: 'ADDROBJ.DBF',
|
70
|
+
house_intervals: 'HOUSEINT.DBF',
|
71
|
+
landmarks: 'LANDMARK.DBF'
|
72
|
+
}.merge(
|
73
|
+
HOUSE_TABLES
|
74
|
+
).merge(
|
75
|
+
NORDOC_TABLES
|
76
|
+
)
|
77
|
+
|
78
|
+
DEFAULT_ENCODING = Encoding::CP866
|
79
|
+
end
|
80
|
+
end
|
81
|
+
end
|
@@ -0,0 +1,37 @@
|
|
1
|
+
module Fias
|
2
|
+
module Import
|
3
|
+
module DownloadService
|
4
|
+
def url
|
5
|
+
response = HTTParty.post(
|
6
|
+
'http://fias.nalog.ru/WebServices/Public/DownloadService.asmx',
|
7
|
+
OPTIONS
|
8
|
+
)
|
9
|
+
|
10
|
+
matches =
|
11
|
+
response.body.match(/<FiasCompleteDbfUrl>(.*)<\/FiasCompleteDbfUrl>/)
|
12
|
+
|
13
|
+
matches[1] if matches
|
14
|
+
end
|
15
|
+
|
16
|
+
OPTIONS = {
|
17
|
+
body: %(<?xml version="1.0" encoding="utf-8"?>
|
18
|
+
<soap:Envelope
|
19
|
+
xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
|
20
|
+
xmlns:xsd="http://www.w3.org/2001/XMLSchema"
|
21
|
+
xmlns:soap="http://schemas.xmlsoap.org/soap/envelope/">
|
22
|
+
<soap:Body>
|
23
|
+
<GetLastDownloadFileInfo
|
24
|
+
xmlns="http://fias.nalog.ru/WebServices/Public/DownloadService.asmx/" />
|
25
|
+
</soap:Body>
|
26
|
+
</soap:Envelope>
|
27
|
+
),
|
28
|
+
headers: {
|
29
|
+
'SOAPAction' => 'http://fias.nalog.ru/WebServices/Public/DownloadService.asmx/GetLastDownloadFileInfo',
|
30
|
+
'Content-Type' => 'text/xml; encoding=utf-8'
|
31
|
+
}
|
32
|
+
}
|
33
|
+
|
34
|
+
module_function :url
|
35
|
+
end
|
36
|
+
end
|
37
|
+
end
|
@@ -0,0 +1,51 @@
|
|
1
|
+
module Fias
|
2
|
+
module Import
|
3
|
+
class RestoreParentId
|
4
|
+
def initialize(scope, options = {})
|
5
|
+
@scope = scope
|
6
|
+
@key = options.fetch(:key, :aoguid)
|
7
|
+
@parent_key = options.fetch(:parent_key, :parentguid)
|
8
|
+
@id = options.fetch(:id, :id)
|
9
|
+
@parent_id = options.fetch(:parent_id, :parent_id)
|
10
|
+
end
|
11
|
+
|
12
|
+
def restore
|
13
|
+
id_grouped_by_parent_id.each do |parent_id, ids|
|
14
|
+
@scope.where(id: ids).update(parent_id: parent_id)
|
15
|
+
end
|
16
|
+
end
|
17
|
+
|
18
|
+
private
|
19
|
+
|
20
|
+
def records
|
21
|
+
@records ||= @scope.select_map([@id, @key, @parent_key])
|
22
|
+
end
|
23
|
+
|
24
|
+
def records_by_key
|
25
|
+
@records_by_key ||= records.index_by { |r| r[1] }
|
26
|
+
end
|
27
|
+
|
28
|
+
def id_parent_id_tuples
|
29
|
+
records.map do |row|
|
30
|
+
id, _, key = row
|
31
|
+
|
32
|
+
if key
|
33
|
+
parent_id = records_by_key[key]
|
34
|
+
parent_id = parent_id[0] if parent_id
|
35
|
+
end
|
36
|
+
|
37
|
+
[id, parent_id]
|
38
|
+
end
|
39
|
+
end
|
40
|
+
|
41
|
+
def id_grouped_by_parent_id
|
42
|
+
{}.tap do |rows|
|
43
|
+
id_parent_id_tuples.each do |(id, parent_id)|
|
44
|
+
rows[parent_id] ||= []
|
45
|
+
rows[parent_id] << id
|
46
|
+
end
|
47
|
+
end
|
48
|
+
end
|
49
|
+
end
|
50
|
+
end
|
51
|
+
end
|
@@ -0,0 +1,74 @@
|
|
1
|
+
module Fias
|
2
|
+
module Import
|
3
|
+
class Tables
|
4
|
+
def initialize(db, files, prefix = DEFAULT_PREFIX)
|
5
|
+
@db = db
|
6
|
+
@files = files
|
7
|
+
@prefix = prefix
|
8
|
+
end
|
9
|
+
|
10
|
+
attr_reader :files
|
11
|
+
|
12
|
+
def create
|
13
|
+
@files.each do |name, dbf|
|
14
|
+
next if dbf.blank?
|
15
|
+
create_table(name, dbf)
|
16
|
+
end
|
17
|
+
end
|
18
|
+
|
19
|
+
def copy
|
20
|
+
@files.map do |name, dbf|
|
21
|
+
Copy.new(@db, table_name(name), dbf, uuid_column_types(name))
|
22
|
+
end
|
23
|
+
end
|
24
|
+
|
25
|
+
private
|
26
|
+
|
27
|
+
def table_name(name)
|
28
|
+
[@prefix, name].delete_if(&:blank?).join('_').to_sym
|
29
|
+
end
|
30
|
+
|
31
|
+
def create_table(name, dbf)
|
32
|
+
columns = columns_for(name, dbf)
|
33
|
+
@db.create_table(table_name(name)) do
|
34
|
+
primary_key :id
|
35
|
+
columns.each { |args| column(*args) }
|
36
|
+
end
|
37
|
+
end
|
38
|
+
|
39
|
+
def columns_for(name, dbf)
|
40
|
+
dbf.columns.map do |column|
|
41
|
+
column_for(name, column)
|
42
|
+
end
|
43
|
+
end
|
44
|
+
|
45
|
+
def column_for(name, column)
|
46
|
+
alter = UUID[name]
|
47
|
+
column_name = column.name.downcase
|
48
|
+
|
49
|
+
parse_c_def(column.schema_definition).tap do |c_def|
|
50
|
+
c_def[1] = :uuid if alter && alter.include?(column_name)
|
51
|
+
c_def[1] = :text if c_def[1] == :string
|
52
|
+
end
|
53
|
+
end
|
54
|
+
|
55
|
+
def parse_c_def(c_def)
|
56
|
+
c_def = c_def.strip.split(',').map(&:strip)
|
57
|
+
name = c_def[0][1..-2]
|
58
|
+
type = c_def[1][1..-1]
|
59
|
+
[name, type].map(&:to_sym)
|
60
|
+
end
|
61
|
+
|
62
|
+
def uuid_column_types(name)
|
63
|
+
uuid = UUID[name] || []
|
64
|
+
Hash[*uuid.zip([:uuid] * uuid.size).flatten]
|
65
|
+
end
|
66
|
+
|
67
|
+
UUID = {
|
68
|
+
address_objects: %w(aoguid aoid previd nextid parentguid)
|
69
|
+
}
|
70
|
+
|
71
|
+
DEFAULT_PREFIX = 'fias'
|
72
|
+
end
|
73
|
+
end
|
74
|
+
end
|
@@ -0,0 +1,30 @@
|
|
1
|
+
module Fias
|
2
|
+
module Name
|
3
|
+
module Append
|
4
|
+
class << self
|
5
|
+
def append(name, short_name)
|
6
|
+
long, _, short, _ = Canonical.canonical(short_name)
|
7
|
+
|
8
|
+
exception = Fias.config.exceptions[Unicode.downcase(name)]
|
9
|
+
return exception.reverse if exception
|
10
|
+
|
11
|
+
[concat(short, name), concat(long, name)]
|
12
|
+
end
|
13
|
+
|
14
|
+
private
|
15
|
+
|
16
|
+
def concat(status, name)
|
17
|
+
must_append?(name) ? "#{name} #{status}" : "#{status} #{name}"
|
18
|
+
end
|
19
|
+
|
20
|
+
def must_append?(name)
|
21
|
+
ending = name[-2..-1]
|
22
|
+
ENDINGS_TO_APPEND.include?(ending) || name =~ JUST_NUMBER
|
23
|
+
end
|
24
|
+
end
|
25
|
+
|
26
|
+
ENDINGS_TO_APPEND = %w(ая ий ый)
|
27
|
+
JUST_NUMBER = /^\d+([\-А-Яа-яе]{1,3})?$/u
|
28
|
+
end
|
29
|
+
end
|
30
|
+
end
|
@@ -0,0 +1,42 @@
|
|
1
|
+
module Fias
|
2
|
+
module Name
|
3
|
+
module Canonical
|
4
|
+
class << self
|
5
|
+
def canonical(name)
|
6
|
+
result = search(name) || search_exception(name)
|
7
|
+
result || fail("Unknown abbrevation: #{name}")
|
8
|
+
fix_republic_case(result)
|
9
|
+
end
|
10
|
+
|
11
|
+
private
|
12
|
+
|
13
|
+
def search(key)
|
14
|
+
long = Fias.config.index[Unicode.downcase(key)]
|
15
|
+
return nil unless long
|
16
|
+
short = short_for(long)
|
17
|
+
short_stripped = short_for(long).gsub(/\.$/, '')
|
18
|
+
[long, short_stripped, short, aliases_for(long)].flatten.compact
|
19
|
+
end
|
20
|
+
|
21
|
+
def short_for(long)
|
22
|
+
Fias.config.shorts[Unicode.downcase(long)]
|
23
|
+
end
|
24
|
+
|
25
|
+
def aliases_for(long)
|
26
|
+
Fias.config.aliases[Unicode.downcase(long)]
|
27
|
+
end
|
28
|
+
|
29
|
+
def search_exception(name)
|
30
|
+
Fias.config.exceptions[Unicode.downcase(name)]
|
31
|
+
end
|
32
|
+
|
33
|
+
def fix_republic_case(canonical)
|
34
|
+
return canonical unless canonical[0] == REPUBLIC
|
35
|
+
canonical.map { |n| Unicode.upcase(n[0]) + n[1..-1] }
|
36
|
+
end
|
37
|
+
end
|
38
|
+
|
39
|
+
REPUBLIC = 'республика'
|
40
|
+
end
|
41
|
+
end
|
42
|
+
end
|