fias 0.0.2 → 1.0.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +7 -0
- data/.gitignore +15 -22
- data/.rubocop.yml +7 -0
- data/.travis.yml +10 -0
- data/Gemfile +1 -1
- data/LICENSE.txt +2 -2
- data/README.md +259 -155
- data/Rakefile +6 -1
- data/config/names.txt +0 -0
- data/config/synonyms.yml +50 -0
- data/examples/create.rb +106 -0
- data/examples/generate_index.rb +63 -0
- data/fias.gemspec +33 -21
- data/lib/fias.rb +197 -10
- data/lib/fias/config.rb +74 -0
- data/lib/fias/import/copy.rb +62 -0
- data/lib/fias/import/dbf.rb +81 -0
- data/lib/fias/import/download_service.rb +37 -0
- data/lib/fias/import/restore_parent_id.rb +51 -0
- data/lib/fias/import/tables.rb +74 -0
- data/lib/fias/name/append.rb +30 -0
- data/lib/fias/name/canonical.rb +42 -0
- data/lib/fias/name/extract.rb +85 -0
- data/lib/fias/name/house_number.rb +71 -0
- data/lib/fias/name/split.rb +60 -0
- data/lib/fias/name/synonyms.rb +93 -0
- data/lib/fias/query.rb +43 -0
- data/lib/fias/query/estimate.rb +67 -0
- data/lib/fias/query/finder.rb +75 -0
- data/lib/fias/query/params.rb +101 -0
- data/lib/fias/railtie.rb +3 -17
- data/lib/fias/version.rb +1 -1
- data/spec/fixtures/ACTSTAT.DBF +0 -0
- data/spec/fixtures/NORDOC99.DBF +0 -0
- data/spec/fixtures/STRSTAT.DBF +0 -0
- data/spec/fixtures/addressing.yml +93 -0
- data/spec/fixtures/query.yml +79 -0
- data/spec/fixtures/query_sanitization.yml +75 -0
- data/spec/fixtures/status_append.yml +60 -0
- data/spec/lib/import/copy_spec.rb +44 -0
- data/spec/lib/import/dbf_spec.rb +28 -0
- data/spec/lib/import/download_service_spec.rb +15 -0
- data/spec/lib/import/restore_parent_id_spec.rb +34 -0
- data/spec/lib/import/tables_spec.rb +26 -0
- data/spec/lib/name/append_spec.rb +14 -0
- data/spec/lib/name/canonical_spec.rb +20 -0
- data/spec/lib/name/extract_spec.rb +67 -0
- data/spec/lib/name/house_number_spec.rb +45 -0
- data/spec/lib/name/query_spec.rb +21 -0
- data/spec/lib/name/split_spec.rb +15 -0
- data/spec/lib/name/synonyms_spec.rb +51 -0
- data/spec/lib/query/params_spec.rb +15 -0
- data/spec/lib/query_spec.rb +27 -0
- data/spec/spec_helper.rb +30 -0
- data/spec/support/db.rb +30 -0
- data/spec/support/query.rb +13 -0
- data/tasks/db.rake +52 -0
- data/tasks/download.rake +15 -0
- metadata +246 -64
- data/lib/fias/active_record/address_object.rb +0 -231
- data/lib/fias/active_record/address_object_type.rb +0 -15
- data/lib/fias/dbf_wrapper.rb +0 -90
- data/lib/fias/importer.rb +0 -30
- data/lib/fias/importer/base.rb +0 -59
- data/lib/fias/importer/pg.rb +0 -81
- data/lib/fias/importer/sqlite.rb +0 -38
- data/lib/generators/fias/migration.rb +0 -34
- data/lib/generators/fias/templates/create_fias_tables.rb +0 -5
- data/tasks/fias.rake +0 -68
data/lib/fias/config.rb
ADDED
@@ -0,0 +1,74 @@
|
|
1
|
+
module Fias
|
2
|
+
class Config
|
3
|
+
def initialize
|
4
|
+
@index = {}
|
5
|
+
@longs = {}
|
6
|
+
@shorts = {}
|
7
|
+
@aliases = {}
|
8
|
+
@exceptions = {}
|
9
|
+
@proper_names = []
|
10
|
+
@synonyms = []
|
11
|
+
@synonyms_index = {}
|
12
|
+
|
13
|
+
yield(self)
|
14
|
+
|
15
|
+
finalize_index
|
16
|
+
end
|
17
|
+
|
18
|
+
attr_reader :index, :longs, :shorts, :aliases, :exceptions
|
19
|
+
attr_reader :proper_names, :synonyms, :synonyms_index
|
20
|
+
|
21
|
+
def add_name(long, short, aliases = [])
|
22
|
+
@longs[Unicode.downcase(short)] = long
|
23
|
+
@shorts[Unicode.downcase(long)] = short
|
24
|
+
@aliases[Unicode.downcase(long)] = aliases
|
25
|
+
|
26
|
+
populate_index(long, short, aliases)
|
27
|
+
end
|
28
|
+
|
29
|
+
def add_exception(long, short)
|
30
|
+
@exceptions[Unicode.downcase(short)] = [long, short]
|
31
|
+
@exceptions[Unicode.downcase(long)] = [long, short]
|
32
|
+
end
|
33
|
+
|
34
|
+
def add_proper_name(name)
|
35
|
+
@proper_names << name
|
36
|
+
end
|
37
|
+
|
38
|
+
def add_synonym(*names)
|
39
|
+
@synonyms << names
|
40
|
+
populate_synonyms_index(names)
|
41
|
+
end
|
42
|
+
|
43
|
+
private
|
44
|
+
|
45
|
+
def populate_index(long, short, aliases)
|
46
|
+
long_downcase = Unicode.downcase(long)
|
47
|
+
short_downcase = Unicode.downcase(short)
|
48
|
+
|
49
|
+
populate_long_permutations(long)
|
50
|
+
|
51
|
+
if long_downcase != short_downcase
|
52
|
+
@index[short_downcase] = long
|
53
|
+
@index[short_downcase[0..-2]] = long if short_downcase[-1] == '.'
|
54
|
+
end
|
55
|
+
|
56
|
+
aliases.each { |al| @index[Unicode.downcase(al)] = long }
|
57
|
+
end
|
58
|
+
|
59
|
+
def populate_long_permutations(long)
|
60
|
+
Unicode.downcase(long).split(' ').permutation.each do |variant|
|
61
|
+
@index[variant.join(' ')] = long
|
62
|
+
end
|
63
|
+
end
|
64
|
+
|
65
|
+
def finalize_index
|
66
|
+
@index = @index.sort_by { |key, _| key.size }.reverse
|
67
|
+
@index = Hash[*@index.flatten]
|
68
|
+
end
|
69
|
+
|
70
|
+
def populate_synonyms_index(names)
|
71
|
+
names.each { |name| @synonyms_index[name] = names }
|
72
|
+
end
|
73
|
+
end
|
74
|
+
end
|
@@ -0,0 +1,62 @@
|
|
1
|
+
module Fias
|
2
|
+
module Import
|
3
|
+
class Copy
|
4
|
+
attr_reader :dbf, :table_name
|
5
|
+
|
6
|
+
def initialize(db, table_name, dbf, types = {})
|
7
|
+
@db = db
|
8
|
+
@table_name = table_name.to_sym
|
9
|
+
@dbf = dbf
|
10
|
+
@encoder = PgDataEncoder::EncodeForCopy.new(
|
11
|
+
column_types: map_types(types)
|
12
|
+
)
|
13
|
+
end
|
14
|
+
|
15
|
+
def encode
|
16
|
+
@dbf.each do |record|
|
17
|
+
line = record.to_a.map { |v| v == '' ? nil : v }
|
18
|
+
@encoder.add(line)
|
19
|
+
yield if block_given?
|
20
|
+
end
|
21
|
+
end
|
22
|
+
|
23
|
+
def copy
|
24
|
+
prepare
|
25
|
+
copy_into
|
26
|
+
end
|
27
|
+
|
28
|
+
private
|
29
|
+
|
30
|
+
def map_types(types)
|
31
|
+
types = types.map do |name, type|
|
32
|
+
index = columns.index(name.to_sym)
|
33
|
+
[index, type] if index
|
34
|
+
end
|
35
|
+
Hash[*types.compact.flatten]
|
36
|
+
end
|
37
|
+
|
38
|
+
def columns
|
39
|
+
@columns ||= @dbf.columns.map(&:name).map(&:downcase).map(&:to_sym)
|
40
|
+
end
|
41
|
+
|
42
|
+
def prepare
|
43
|
+
@db[@table_name].truncate
|
44
|
+
@db.run('SET client_min_messages TO warning;')
|
45
|
+
end
|
46
|
+
|
47
|
+
def copy_into
|
48
|
+
io = @encoder.get_io
|
49
|
+
|
50
|
+
@db.copy_into(@table_name.to_sym, columns: columns, format: :binary) do
|
51
|
+
begin
|
52
|
+
io.readpartial(BLOCK_SIZE)
|
53
|
+
rescue EOFError => _e
|
54
|
+
nil
|
55
|
+
end
|
56
|
+
end
|
57
|
+
end
|
58
|
+
|
59
|
+
BLOCK_SIZE = 65_536 # 10_240
|
60
|
+
end
|
61
|
+
end
|
62
|
+
end
|
@@ -0,0 +1,81 @@
|
|
1
|
+
module Fias
|
2
|
+
module Import
|
3
|
+
class Dbf
|
4
|
+
def initialize(path, encoding = DEFAULT_ENCODING)
|
5
|
+
@path = path
|
6
|
+
@files = {}
|
7
|
+
|
8
|
+
unless Dir.exist?(@path)
|
9
|
+
fail ArgumentError, "FIAS database path #{@path} does not exists"
|
10
|
+
end
|
11
|
+
|
12
|
+
open_files(encoding)
|
13
|
+
end
|
14
|
+
|
15
|
+
def only(*names)
|
16
|
+
return @files if names.empty?
|
17
|
+
|
18
|
+
names = names.map do |name|
|
19
|
+
name = name.to_sym
|
20
|
+
name == :houses ? HOUSE_TABLES.keys : name
|
21
|
+
name == :nordocs ? NORDOC_TABLES.keys : name
|
22
|
+
end
|
23
|
+
|
24
|
+
names.flatten!
|
25
|
+
|
26
|
+
@files.slice(*names)
|
27
|
+
end
|
28
|
+
|
29
|
+
attr_reader :files
|
30
|
+
|
31
|
+
private
|
32
|
+
|
33
|
+
def open_files(encoding)
|
34
|
+
TABLES.each do |accessor, dbf_filename|
|
35
|
+
filename = File.join(@path, dbf_filename)
|
36
|
+
|
37
|
+
next unless File.exist?(filename)
|
38
|
+
|
39
|
+
dbf = DBF::Table.new(filename, nil, encoding)
|
40
|
+
@files[accessor] = dbf if dbf
|
41
|
+
end
|
42
|
+
end
|
43
|
+
|
44
|
+
def self.n_tables(title)
|
45
|
+
tables = (1..99).map do |n|
|
46
|
+
[
|
47
|
+
format('%s%0.2d', title, n).to_sym,
|
48
|
+
format('%s%0.2d.DBF', title.upcase, n)
|
49
|
+
]
|
50
|
+
end
|
51
|
+
|
52
|
+
tables.flatten!
|
53
|
+
|
54
|
+
Hash[*tables]
|
55
|
+
end
|
56
|
+
|
57
|
+
HOUSE_TABLES = n_tables('house')
|
58
|
+
NORDOC_TABLES = n_tables('nordoc')
|
59
|
+
|
60
|
+
TABLES = {
|
61
|
+
address_object_types: 'SOCRBASE.DBF',
|
62
|
+
current_statuses: 'CURENTST.DBF',
|
63
|
+
actual_statuses: 'ACTSTAT.DBF',
|
64
|
+
operation_statuses: 'OPERSTAT.DBF',
|
65
|
+
center_statuses: 'CENTERST.DBF',
|
66
|
+
interval_statuses: 'INTVSTAT.DBF',
|
67
|
+
estate_statues: 'ESTSTAT.DBF',
|
68
|
+
structure_statuses: 'STRSTAT.DBF',
|
69
|
+
address_objects: 'ADDROBJ.DBF',
|
70
|
+
house_intervals: 'HOUSEINT.DBF',
|
71
|
+
landmarks: 'LANDMARK.DBF'
|
72
|
+
}.merge(
|
73
|
+
HOUSE_TABLES
|
74
|
+
).merge(
|
75
|
+
NORDOC_TABLES
|
76
|
+
)
|
77
|
+
|
78
|
+
DEFAULT_ENCODING = Encoding::CP866
|
79
|
+
end
|
80
|
+
end
|
81
|
+
end
|
@@ -0,0 +1,37 @@
|
|
1
|
+
module Fias
|
2
|
+
module Import
|
3
|
+
module DownloadService
|
4
|
+
def url
|
5
|
+
response = HTTParty.post(
|
6
|
+
'http://fias.nalog.ru/WebServices/Public/DownloadService.asmx',
|
7
|
+
OPTIONS
|
8
|
+
)
|
9
|
+
|
10
|
+
matches =
|
11
|
+
response.body.match(/<FiasCompleteDbfUrl>(.*)<\/FiasCompleteDbfUrl>/)
|
12
|
+
|
13
|
+
matches[1] if matches
|
14
|
+
end
|
15
|
+
|
16
|
+
OPTIONS = {
|
17
|
+
body: %(<?xml version="1.0" encoding="utf-8"?>
|
18
|
+
<soap:Envelope
|
19
|
+
xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
|
20
|
+
xmlns:xsd="http://www.w3.org/2001/XMLSchema"
|
21
|
+
xmlns:soap="http://schemas.xmlsoap.org/soap/envelope/">
|
22
|
+
<soap:Body>
|
23
|
+
<GetLastDownloadFileInfo
|
24
|
+
xmlns="http://fias.nalog.ru/WebServices/Public/DownloadService.asmx/" />
|
25
|
+
</soap:Body>
|
26
|
+
</soap:Envelope>
|
27
|
+
),
|
28
|
+
headers: {
|
29
|
+
'SOAPAction' => 'http://fias.nalog.ru/WebServices/Public/DownloadService.asmx/GetLastDownloadFileInfo',
|
30
|
+
'Content-Type' => 'text/xml; encoding=utf-8'
|
31
|
+
}
|
32
|
+
}
|
33
|
+
|
34
|
+
module_function :url
|
35
|
+
end
|
36
|
+
end
|
37
|
+
end
|
@@ -0,0 +1,51 @@
|
|
1
|
+
module Fias
|
2
|
+
module Import
|
3
|
+
class RestoreParentId
|
4
|
+
def initialize(scope, options = {})
|
5
|
+
@scope = scope
|
6
|
+
@key = options.fetch(:key, :aoguid)
|
7
|
+
@parent_key = options.fetch(:parent_key, :parentguid)
|
8
|
+
@id = options.fetch(:id, :id)
|
9
|
+
@parent_id = options.fetch(:parent_id, :parent_id)
|
10
|
+
end
|
11
|
+
|
12
|
+
def restore
|
13
|
+
id_grouped_by_parent_id.each do |parent_id, ids|
|
14
|
+
@scope.where(id: ids).update(parent_id: parent_id)
|
15
|
+
end
|
16
|
+
end
|
17
|
+
|
18
|
+
private
|
19
|
+
|
20
|
+
def records
|
21
|
+
@records ||= @scope.select_map([@id, @key, @parent_key])
|
22
|
+
end
|
23
|
+
|
24
|
+
def records_by_key
|
25
|
+
@records_by_key ||= records.index_by { |r| r[1] }
|
26
|
+
end
|
27
|
+
|
28
|
+
def id_parent_id_tuples
|
29
|
+
records.map do |row|
|
30
|
+
id, _, key = row
|
31
|
+
|
32
|
+
if key
|
33
|
+
parent_id = records_by_key[key]
|
34
|
+
parent_id = parent_id[0] if parent_id
|
35
|
+
end
|
36
|
+
|
37
|
+
[id, parent_id]
|
38
|
+
end
|
39
|
+
end
|
40
|
+
|
41
|
+
def id_grouped_by_parent_id
|
42
|
+
{}.tap do |rows|
|
43
|
+
id_parent_id_tuples.each do |(id, parent_id)|
|
44
|
+
rows[parent_id] ||= []
|
45
|
+
rows[parent_id] << id
|
46
|
+
end
|
47
|
+
end
|
48
|
+
end
|
49
|
+
end
|
50
|
+
end
|
51
|
+
end
|
@@ -0,0 +1,74 @@
|
|
1
|
+
module Fias
|
2
|
+
module Import
|
3
|
+
class Tables
|
4
|
+
def initialize(db, files, prefix = DEFAULT_PREFIX)
|
5
|
+
@db = db
|
6
|
+
@files = files
|
7
|
+
@prefix = prefix
|
8
|
+
end
|
9
|
+
|
10
|
+
attr_reader :files
|
11
|
+
|
12
|
+
def create
|
13
|
+
@files.each do |name, dbf|
|
14
|
+
next if dbf.blank?
|
15
|
+
create_table(name, dbf)
|
16
|
+
end
|
17
|
+
end
|
18
|
+
|
19
|
+
def copy
|
20
|
+
@files.map do |name, dbf|
|
21
|
+
Copy.new(@db, table_name(name), dbf, uuid_column_types(name))
|
22
|
+
end
|
23
|
+
end
|
24
|
+
|
25
|
+
private
|
26
|
+
|
27
|
+
def table_name(name)
|
28
|
+
[@prefix, name].delete_if(&:blank?).join('_').to_sym
|
29
|
+
end
|
30
|
+
|
31
|
+
def create_table(name, dbf)
|
32
|
+
columns = columns_for(name, dbf)
|
33
|
+
@db.create_table(table_name(name)) do
|
34
|
+
primary_key :id
|
35
|
+
columns.each { |args| column(*args) }
|
36
|
+
end
|
37
|
+
end
|
38
|
+
|
39
|
+
def columns_for(name, dbf)
|
40
|
+
dbf.columns.map do |column|
|
41
|
+
column_for(name, column)
|
42
|
+
end
|
43
|
+
end
|
44
|
+
|
45
|
+
def column_for(name, column)
|
46
|
+
alter = UUID[name]
|
47
|
+
column_name = column.name.downcase
|
48
|
+
|
49
|
+
parse_c_def(column.schema_definition).tap do |c_def|
|
50
|
+
c_def[1] = :uuid if alter && alter.include?(column_name)
|
51
|
+
c_def[1] = :text if c_def[1] == :string
|
52
|
+
end
|
53
|
+
end
|
54
|
+
|
55
|
+
def parse_c_def(c_def)
|
56
|
+
c_def = c_def.strip.split(',').map(&:strip)
|
57
|
+
name = c_def[0][1..-2]
|
58
|
+
type = c_def[1][1..-1]
|
59
|
+
[name, type].map(&:to_sym)
|
60
|
+
end
|
61
|
+
|
62
|
+
def uuid_column_types(name)
|
63
|
+
uuid = UUID[name] || []
|
64
|
+
Hash[*uuid.zip([:uuid] * uuid.size).flatten]
|
65
|
+
end
|
66
|
+
|
67
|
+
UUID = {
|
68
|
+
address_objects: %w(aoguid aoid previd nextid parentguid)
|
69
|
+
}
|
70
|
+
|
71
|
+
DEFAULT_PREFIX = 'fias'
|
72
|
+
end
|
73
|
+
end
|
74
|
+
end
|
@@ -0,0 +1,30 @@
|
|
1
|
+
module Fias
|
2
|
+
module Name
|
3
|
+
module Append
|
4
|
+
class << self
|
5
|
+
def append(name, short_name)
|
6
|
+
long, _, short, _ = Canonical.canonical(short_name)
|
7
|
+
|
8
|
+
exception = Fias.config.exceptions[Unicode.downcase(name)]
|
9
|
+
return exception.reverse if exception
|
10
|
+
|
11
|
+
[concat(short, name), concat(long, name)]
|
12
|
+
end
|
13
|
+
|
14
|
+
private
|
15
|
+
|
16
|
+
def concat(status, name)
|
17
|
+
must_append?(name) ? "#{name} #{status}" : "#{status} #{name}"
|
18
|
+
end
|
19
|
+
|
20
|
+
def must_append?(name)
|
21
|
+
ending = name[-2..-1]
|
22
|
+
ENDINGS_TO_APPEND.include?(ending) || name =~ JUST_NUMBER
|
23
|
+
end
|
24
|
+
end
|
25
|
+
|
26
|
+
ENDINGS_TO_APPEND = %w(ая ий ый)
|
27
|
+
JUST_NUMBER = /^\d+([\-А-Яа-яе]{1,3})?$/u
|
28
|
+
end
|
29
|
+
end
|
30
|
+
end
|
@@ -0,0 +1,42 @@
|
|
1
|
+
module Fias
|
2
|
+
module Name
|
3
|
+
module Canonical
|
4
|
+
class << self
|
5
|
+
def canonical(name)
|
6
|
+
result = search(name) || search_exception(name)
|
7
|
+
result || fail("Unknown abbrevation: #{name}")
|
8
|
+
fix_republic_case(result)
|
9
|
+
end
|
10
|
+
|
11
|
+
private
|
12
|
+
|
13
|
+
def search(key)
|
14
|
+
long = Fias.config.index[Unicode.downcase(key)]
|
15
|
+
return nil unless long
|
16
|
+
short = short_for(long)
|
17
|
+
short_stripped = short_for(long).gsub(/\.$/, '')
|
18
|
+
[long, short_stripped, short, aliases_for(long)].flatten.compact
|
19
|
+
end
|
20
|
+
|
21
|
+
def short_for(long)
|
22
|
+
Fias.config.shorts[Unicode.downcase(long)]
|
23
|
+
end
|
24
|
+
|
25
|
+
def aliases_for(long)
|
26
|
+
Fias.config.aliases[Unicode.downcase(long)]
|
27
|
+
end
|
28
|
+
|
29
|
+
def search_exception(name)
|
30
|
+
Fias.config.exceptions[Unicode.downcase(name)]
|
31
|
+
end
|
32
|
+
|
33
|
+
def fix_republic_case(canonical)
|
34
|
+
return canonical unless canonical[0] == REPUBLIC
|
35
|
+
canonical.map { |n| Unicode.upcase(n[0]) + n[1..-1] }
|
36
|
+
end
|
37
|
+
end
|
38
|
+
|
39
|
+
REPUBLIC = 'республика'
|
40
|
+
end
|
41
|
+
end
|
42
|
+
end
|