fias 0.0.2 → 1.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (69) hide show
  1. checksums.yaml +7 -0
  2. data/.gitignore +15 -22
  3. data/.rubocop.yml +7 -0
  4. data/.travis.yml +10 -0
  5. data/Gemfile +1 -1
  6. data/LICENSE.txt +2 -2
  7. data/README.md +259 -155
  8. data/Rakefile +6 -1
  9. data/config/names.txt +0 -0
  10. data/config/synonyms.yml +50 -0
  11. data/examples/create.rb +106 -0
  12. data/examples/generate_index.rb +63 -0
  13. data/fias.gemspec +33 -21
  14. data/lib/fias.rb +197 -10
  15. data/lib/fias/config.rb +74 -0
  16. data/lib/fias/import/copy.rb +62 -0
  17. data/lib/fias/import/dbf.rb +81 -0
  18. data/lib/fias/import/download_service.rb +37 -0
  19. data/lib/fias/import/restore_parent_id.rb +51 -0
  20. data/lib/fias/import/tables.rb +74 -0
  21. data/lib/fias/name/append.rb +30 -0
  22. data/lib/fias/name/canonical.rb +42 -0
  23. data/lib/fias/name/extract.rb +85 -0
  24. data/lib/fias/name/house_number.rb +71 -0
  25. data/lib/fias/name/split.rb +60 -0
  26. data/lib/fias/name/synonyms.rb +93 -0
  27. data/lib/fias/query.rb +43 -0
  28. data/lib/fias/query/estimate.rb +67 -0
  29. data/lib/fias/query/finder.rb +75 -0
  30. data/lib/fias/query/params.rb +101 -0
  31. data/lib/fias/railtie.rb +3 -17
  32. data/lib/fias/version.rb +1 -1
  33. data/spec/fixtures/ACTSTAT.DBF +0 -0
  34. data/spec/fixtures/NORDOC99.DBF +0 -0
  35. data/spec/fixtures/STRSTAT.DBF +0 -0
  36. data/spec/fixtures/addressing.yml +93 -0
  37. data/spec/fixtures/query.yml +79 -0
  38. data/spec/fixtures/query_sanitization.yml +75 -0
  39. data/spec/fixtures/status_append.yml +60 -0
  40. data/spec/lib/import/copy_spec.rb +44 -0
  41. data/spec/lib/import/dbf_spec.rb +28 -0
  42. data/spec/lib/import/download_service_spec.rb +15 -0
  43. data/spec/lib/import/restore_parent_id_spec.rb +34 -0
  44. data/spec/lib/import/tables_spec.rb +26 -0
  45. data/spec/lib/name/append_spec.rb +14 -0
  46. data/spec/lib/name/canonical_spec.rb +20 -0
  47. data/spec/lib/name/extract_spec.rb +67 -0
  48. data/spec/lib/name/house_number_spec.rb +45 -0
  49. data/spec/lib/name/query_spec.rb +21 -0
  50. data/spec/lib/name/split_spec.rb +15 -0
  51. data/spec/lib/name/synonyms_spec.rb +51 -0
  52. data/spec/lib/query/params_spec.rb +15 -0
  53. data/spec/lib/query_spec.rb +27 -0
  54. data/spec/spec_helper.rb +30 -0
  55. data/spec/support/db.rb +30 -0
  56. data/spec/support/query.rb +13 -0
  57. data/tasks/db.rake +52 -0
  58. data/tasks/download.rake +15 -0
  59. metadata +246 -64
  60. data/lib/fias/active_record/address_object.rb +0 -231
  61. data/lib/fias/active_record/address_object_type.rb +0 -15
  62. data/lib/fias/dbf_wrapper.rb +0 -90
  63. data/lib/fias/importer.rb +0 -30
  64. data/lib/fias/importer/base.rb +0 -59
  65. data/lib/fias/importer/pg.rb +0 -81
  66. data/lib/fias/importer/sqlite.rb +0 -38
  67. data/lib/generators/fias/migration.rb +0 -34
  68. data/lib/generators/fias/templates/create_fias_tables.rb +0 -5
  69. data/tasks/fias.rake +0 -68
@@ -0,0 +1,74 @@
1
+ module Fias
2
+ class Config
3
+ def initialize
4
+ @index = {}
5
+ @longs = {}
6
+ @shorts = {}
7
+ @aliases = {}
8
+ @exceptions = {}
9
+ @proper_names = []
10
+ @synonyms = []
11
+ @synonyms_index = {}
12
+
13
+ yield(self)
14
+
15
+ finalize_index
16
+ end
17
+
18
+ attr_reader :index, :longs, :shorts, :aliases, :exceptions
19
+ attr_reader :proper_names, :synonyms, :synonyms_index
20
+
21
+ def add_name(long, short, aliases = [])
22
+ @longs[Unicode.downcase(short)] = long
23
+ @shorts[Unicode.downcase(long)] = short
24
+ @aliases[Unicode.downcase(long)] = aliases
25
+
26
+ populate_index(long, short, aliases)
27
+ end
28
+
29
+ def add_exception(long, short)
30
+ @exceptions[Unicode.downcase(short)] = [long, short]
31
+ @exceptions[Unicode.downcase(long)] = [long, short]
32
+ end
33
+
34
+ def add_proper_name(name)
35
+ @proper_names << name
36
+ end
37
+
38
+ def add_synonym(*names)
39
+ @synonyms << names
40
+ populate_synonyms_index(names)
41
+ end
42
+
43
+ private
44
+
45
+ def populate_index(long, short, aliases)
46
+ long_downcase = Unicode.downcase(long)
47
+ short_downcase = Unicode.downcase(short)
48
+
49
+ populate_long_permutations(long)
50
+
51
+ if long_downcase != short_downcase
52
+ @index[short_downcase] = long
53
+ @index[short_downcase[0..-2]] = long if short_downcase[-1] == '.'
54
+ end
55
+
56
+ aliases.each { |al| @index[Unicode.downcase(al)] = long }
57
+ end
58
+
59
+ def populate_long_permutations(long)
60
+ Unicode.downcase(long).split(' ').permutation.each do |variant|
61
+ @index[variant.join(' ')] = long
62
+ end
63
+ end
64
+
65
+ def finalize_index
66
+ @index = @index.sort_by { |key, _| key.size }.reverse
67
+ @index = Hash[*@index.flatten]
68
+ end
69
+
70
+ def populate_synonyms_index(names)
71
+ names.each { |name| @synonyms_index[name] = names }
72
+ end
73
+ end
74
+ end
@@ -0,0 +1,62 @@
1
+ module Fias
2
+ module Import
3
+ class Copy
4
+ attr_reader :dbf, :table_name
5
+
6
+ def initialize(db, table_name, dbf, types = {})
7
+ @db = db
8
+ @table_name = table_name.to_sym
9
+ @dbf = dbf
10
+ @encoder = PgDataEncoder::EncodeForCopy.new(
11
+ column_types: map_types(types)
12
+ )
13
+ end
14
+
15
+ def encode
16
+ @dbf.each do |record|
17
+ line = record.to_a.map { |v| v == '' ? nil : v }
18
+ @encoder.add(line)
19
+ yield if block_given?
20
+ end
21
+ end
22
+
23
+ def copy
24
+ prepare
25
+ copy_into
26
+ end
27
+
28
+ private
29
+
30
+ def map_types(types)
31
+ types = types.map do |name, type|
32
+ index = columns.index(name.to_sym)
33
+ [index, type] if index
34
+ end
35
+ Hash[*types.compact.flatten]
36
+ end
37
+
38
+ def columns
39
+ @columns ||= @dbf.columns.map(&:name).map(&:downcase).map(&:to_sym)
40
+ end
41
+
42
+ def prepare
43
+ @db[@table_name].truncate
44
+ @db.run('SET client_min_messages TO warning;')
45
+ end
46
+
47
+ def copy_into
48
+ io = @encoder.get_io
49
+
50
+ @db.copy_into(@table_name.to_sym, columns: columns, format: :binary) do
51
+ begin
52
+ io.readpartial(BLOCK_SIZE)
53
+ rescue EOFError => _e
54
+ nil
55
+ end
56
+ end
57
+ end
58
+
59
+ BLOCK_SIZE = 65_536 # 10_240
60
+ end
61
+ end
62
+ end
@@ -0,0 +1,81 @@
1
+ module Fias
2
+ module Import
3
+ class Dbf
4
+ def initialize(path, encoding = DEFAULT_ENCODING)
5
+ @path = path
6
+ @files = {}
7
+
8
+ unless Dir.exist?(@path)
9
+ fail ArgumentError, "FIAS database path #{@path} does not exists"
10
+ end
11
+
12
+ open_files(encoding)
13
+ end
14
+
15
+ def only(*names)
16
+ return @files if names.empty?
17
+
18
+ names = names.map do |name|
19
+ name = name.to_sym
20
+ name == :houses ? HOUSE_TABLES.keys : name
21
+ name == :nordocs ? NORDOC_TABLES.keys : name
22
+ end
23
+
24
+ names.flatten!
25
+
26
+ @files.slice(*names)
27
+ end
28
+
29
+ attr_reader :files
30
+
31
+ private
32
+
33
+ def open_files(encoding)
34
+ TABLES.each do |accessor, dbf_filename|
35
+ filename = File.join(@path, dbf_filename)
36
+
37
+ next unless File.exist?(filename)
38
+
39
+ dbf = DBF::Table.new(filename, nil, encoding)
40
+ @files[accessor] = dbf if dbf
41
+ end
42
+ end
43
+
44
+ def self.n_tables(title)
45
+ tables = (1..99).map do |n|
46
+ [
47
+ format('%s%0.2d', title, n).to_sym,
48
+ format('%s%0.2d.DBF', title.upcase, n)
49
+ ]
50
+ end
51
+
52
+ tables.flatten!
53
+
54
+ Hash[*tables]
55
+ end
56
+
57
+ HOUSE_TABLES = n_tables('house')
58
+ NORDOC_TABLES = n_tables('nordoc')
59
+
60
+ TABLES = {
61
+ address_object_types: 'SOCRBASE.DBF',
62
+ current_statuses: 'CURENTST.DBF',
63
+ actual_statuses: 'ACTSTAT.DBF',
64
+ operation_statuses: 'OPERSTAT.DBF',
65
+ center_statuses: 'CENTERST.DBF',
66
+ interval_statuses: 'INTVSTAT.DBF',
67
+ estate_statues: 'ESTSTAT.DBF',
68
+ structure_statuses: 'STRSTAT.DBF',
69
+ address_objects: 'ADDROBJ.DBF',
70
+ house_intervals: 'HOUSEINT.DBF',
71
+ landmarks: 'LANDMARK.DBF'
72
+ }.merge(
73
+ HOUSE_TABLES
74
+ ).merge(
75
+ NORDOC_TABLES
76
+ )
77
+
78
+ DEFAULT_ENCODING = Encoding::CP866
79
+ end
80
+ end
81
+ end
@@ -0,0 +1,37 @@
1
+ module Fias
2
+ module Import
3
+ module DownloadService
4
+ def url
5
+ response = HTTParty.post(
6
+ 'http://fias.nalog.ru/WebServices/Public/DownloadService.asmx',
7
+ OPTIONS
8
+ )
9
+
10
+ matches =
11
+ response.body.match(/<FiasCompleteDbfUrl>(.*)<\/FiasCompleteDbfUrl>/)
12
+
13
+ matches[1] if matches
14
+ end
15
+
16
+ OPTIONS = {
17
+ body: %(<?xml version="1.0" encoding="utf-8"?>
18
+ <soap:Envelope
19
+ xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
20
+ xmlns:xsd="http://www.w3.org/2001/XMLSchema"
21
+ xmlns:soap="http://schemas.xmlsoap.org/soap/envelope/">
22
+ <soap:Body>
23
+ <GetLastDownloadFileInfo
24
+ xmlns="http://fias.nalog.ru/WebServices/Public/DownloadService.asmx/" />
25
+ </soap:Body>
26
+ </soap:Envelope>
27
+ ),
28
+ headers: {
29
+ 'SOAPAction' => 'http://fias.nalog.ru/WebServices/Public/DownloadService.asmx/GetLastDownloadFileInfo',
30
+ 'Content-Type' => 'text/xml; encoding=utf-8'
31
+ }
32
+ }
33
+
34
+ module_function :url
35
+ end
36
+ end
37
+ end
@@ -0,0 +1,51 @@
1
+ module Fias
2
+ module Import
3
+ class RestoreParentId
4
+ def initialize(scope, options = {})
5
+ @scope = scope
6
+ @key = options.fetch(:key, :aoguid)
7
+ @parent_key = options.fetch(:parent_key, :parentguid)
8
+ @id = options.fetch(:id, :id)
9
+ @parent_id = options.fetch(:parent_id, :parent_id)
10
+ end
11
+
12
+ def restore
13
+ id_grouped_by_parent_id.each do |parent_id, ids|
14
+ @scope.where(id: ids).update(parent_id: parent_id)
15
+ end
16
+ end
17
+
18
+ private
19
+
20
+ def records
21
+ @records ||= @scope.select_map([@id, @key, @parent_key])
22
+ end
23
+
24
+ def records_by_key
25
+ @records_by_key ||= records.index_by { |r| r[1] }
26
+ end
27
+
28
+ def id_parent_id_tuples
29
+ records.map do |row|
30
+ id, _, key = row
31
+
32
+ if key
33
+ parent_id = records_by_key[key]
34
+ parent_id = parent_id[0] if parent_id
35
+ end
36
+
37
+ [id, parent_id]
38
+ end
39
+ end
40
+
41
+ def id_grouped_by_parent_id
42
+ {}.tap do |rows|
43
+ id_parent_id_tuples.each do |(id, parent_id)|
44
+ rows[parent_id] ||= []
45
+ rows[parent_id] << id
46
+ end
47
+ end
48
+ end
49
+ end
50
+ end
51
+ end
@@ -0,0 +1,74 @@
1
+ module Fias
2
+ module Import
3
+ class Tables
4
+ def initialize(db, files, prefix = DEFAULT_PREFIX)
5
+ @db = db
6
+ @files = files
7
+ @prefix = prefix
8
+ end
9
+
10
+ attr_reader :files
11
+
12
+ def create
13
+ @files.each do |name, dbf|
14
+ next if dbf.blank?
15
+ create_table(name, dbf)
16
+ end
17
+ end
18
+
19
+ def copy
20
+ @files.map do |name, dbf|
21
+ Copy.new(@db, table_name(name), dbf, uuid_column_types(name))
22
+ end
23
+ end
24
+
25
+ private
26
+
27
+ def table_name(name)
28
+ [@prefix, name].delete_if(&:blank?).join('_').to_sym
29
+ end
30
+
31
+ def create_table(name, dbf)
32
+ columns = columns_for(name, dbf)
33
+ @db.create_table(table_name(name)) do
34
+ primary_key :id
35
+ columns.each { |args| column(*args) }
36
+ end
37
+ end
38
+
39
+ def columns_for(name, dbf)
40
+ dbf.columns.map do |column|
41
+ column_for(name, column)
42
+ end
43
+ end
44
+
45
+ def column_for(name, column)
46
+ alter = UUID[name]
47
+ column_name = column.name.downcase
48
+
49
+ parse_c_def(column.schema_definition).tap do |c_def|
50
+ c_def[1] = :uuid if alter && alter.include?(column_name)
51
+ c_def[1] = :text if c_def[1] == :string
52
+ end
53
+ end
54
+
55
+ def parse_c_def(c_def)
56
+ c_def = c_def.strip.split(',').map(&:strip)
57
+ name = c_def[0][1..-2]
58
+ type = c_def[1][1..-1]
59
+ [name, type].map(&:to_sym)
60
+ end
61
+
62
+ def uuid_column_types(name)
63
+ uuid = UUID[name] || []
64
+ Hash[*uuid.zip([:uuid] * uuid.size).flatten]
65
+ end
66
+
67
+ UUID = {
68
+ address_objects: %w(aoguid aoid previd nextid parentguid)
69
+ }
70
+
71
+ DEFAULT_PREFIX = 'fias'
72
+ end
73
+ end
74
+ end
@@ -0,0 +1,30 @@
1
+ module Fias
2
+ module Name
3
+ module Append
4
+ class << self
5
+ def append(name, short_name)
6
+ long, _, short, _ = Canonical.canonical(short_name)
7
+
8
+ exception = Fias.config.exceptions[Unicode.downcase(name)]
9
+ return exception.reverse if exception
10
+
11
+ [concat(short, name), concat(long, name)]
12
+ end
13
+
14
+ private
15
+
16
+ def concat(status, name)
17
+ must_append?(name) ? "#{name} #{status}" : "#{status} #{name}"
18
+ end
19
+
20
+ def must_append?(name)
21
+ ending = name[-2..-1]
22
+ ENDINGS_TO_APPEND.include?(ending) || name =~ JUST_NUMBER
23
+ end
24
+ end
25
+
26
+ ENDINGS_TO_APPEND = %w(ая ий ый)
27
+ JUST_NUMBER = /^\d+([\-А-Яа-яе]{1,3})?$/u
28
+ end
29
+ end
30
+ end
@@ -0,0 +1,42 @@
1
+ module Fias
2
+ module Name
3
+ module Canonical
4
+ class << self
5
+ def canonical(name)
6
+ result = search(name) || search_exception(name)
7
+ result || fail("Unknown abbrevation: #{name}")
8
+ fix_republic_case(result)
9
+ end
10
+
11
+ private
12
+
13
+ def search(key)
14
+ long = Fias.config.index[Unicode.downcase(key)]
15
+ return nil unless long
16
+ short = short_for(long)
17
+ short_stripped = short_for(long).gsub(/\.$/, '')
18
+ [long, short_stripped, short, aliases_for(long)].flatten.compact
19
+ end
20
+
21
+ def short_for(long)
22
+ Fias.config.shorts[Unicode.downcase(long)]
23
+ end
24
+
25
+ def aliases_for(long)
26
+ Fias.config.aliases[Unicode.downcase(long)]
27
+ end
28
+
29
+ def search_exception(name)
30
+ Fias.config.exceptions[Unicode.downcase(name)]
31
+ end
32
+
33
+ def fix_republic_case(canonical)
34
+ return canonical unless canonical[0] == REPUBLIC
35
+ canonical.map { |n| Unicode.upcase(n[0]) + n[1..-1] }
36
+ end
37
+ end
38
+
39
+ REPUBLIC = 'республика'
40
+ end
41
+ end
42
+ end