fias 0.0.2 → 1.0.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (69) hide show
  1. checksums.yaml +7 -0
  2. data/.gitignore +15 -22
  3. data/.rubocop.yml +7 -0
  4. data/.travis.yml +10 -0
  5. data/Gemfile +1 -1
  6. data/LICENSE.txt +2 -2
  7. data/README.md +259 -155
  8. data/Rakefile +6 -1
  9. data/config/names.txt +0 -0
  10. data/config/synonyms.yml +50 -0
  11. data/examples/create.rb +106 -0
  12. data/examples/generate_index.rb +63 -0
  13. data/fias.gemspec +33 -21
  14. data/lib/fias.rb +197 -10
  15. data/lib/fias/config.rb +74 -0
  16. data/lib/fias/import/copy.rb +62 -0
  17. data/lib/fias/import/dbf.rb +81 -0
  18. data/lib/fias/import/download_service.rb +37 -0
  19. data/lib/fias/import/restore_parent_id.rb +51 -0
  20. data/lib/fias/import/tables.rb +74 -0
  21. data/lib/fias/name/append.rb +30 -0
  22. data/lib/fias/name/canonical.rb +42 -0
  23. data/lib/fias/name/extract.rb +85 -0
  24. data/lib/fias/name/house_number.rb +71 -0
  25. data/lib/fias/name/split.rb +60 -0
  26. data/lib/fias/name/synonyms.rb +93 -0
  27. data/lib/fias/query.rb +43 -0
  28. data/lib/fias/query/estimate.rb +67 -0
  29. data/lib/fias/query/finder.rb +75 -0
  30. data/lib/fias/query/params.rb +101 -0
  31. data/lib/fias/railtie.rb +3 -17
  32. data/lib/fias/version.rb +1 -1
  33. data/spec/fixtures/ACTSTAT.DBF +0 -0
  34. data/spec/fixtures/NORDOC99.DBF +0 -0
  35. data/spec/fixtures/STRSTAT.DBF +0 -0
  36. data/spec/fixtures/addressing.yml +93 -0
  37. data/spec/fixtures/query.yml +79 -0
  38. data/spec/fixtures/query_sanitization.yml +75 -0
  39. data/spec/fixtures/status_append.yml +60 -0
  40. data/spec/lib/import/copy_spec.rb +44 -0
  41. data/spec/lib/import/dbf_spec.rb +28 -0
  42. data/spec/lib/import/download_service_spec.rb +15 -0
  43. data/spec/lib/import/restore_parent_id_spec.rb +34 -0
  44. data/spec/lib/import/tables_spec.rb +26 -0
  45. data/spec/lib/name/append_spec.rb +14 -0
  46. data/spec/lib/name/canonical_spec.rb +20 -0
  47. data/spec/lib/name/extract_spec.rb +67 -0
  48. data/spec/lib/name/house_number_spec.rb +45 -0
  49. data/spec/lib/name/query_spec.rb +21 -0
  50. data/spec/lib/name/split_spec.rb +15 -0
  51. data/spec/lib/name/synonyms_spec.rb +51 -0
  52. data/spec/lib/query/params_spec.rb +15 -0
  53. data/spec/lib/query_spec.rb +27 -0
  54. data/spec/spec_helper.rb +30 -0
  55. data/spec/support/db.rb +30 -0
  56. data/spec/support/query.rb +13 -0
  57. data/tasks/db.rake +52 -0
  58. data/tasks/download.rake +15 -0
  59. metadata +246 -64
  60. data/lib/fias/active_record/address_object.rb +0 -231
  61. data/lib/fias/active_record/address_object_type.rb +0 -15
  62. data/lib/fias/dbf_wrapper.rb +0 -90
  63. data/lib/fias/importer.rb +0 -30
  64. data/lib/fias/importer/base.rb +0 -59
  65. data/lib/fias/importer/pg.rb +0 -81
  66. data/lib/fias/importer/sqlite.rb +0 -38
  67. data/lib/generators/fias/migration.rb +0 -34
  68. data/lib/generators/fias/templates/create_fias_tables.rb +0 -5
  69. data/tasks/fias.rake +0 -68
@@ -0,0 +1,74 @@
1
+ module Fias
2
+ class Config
3
+ def initialize
4
+ @index = {}
5
+ @longs = {}
6
+ @shorts = {}
7
+ @aliases = {}
8
+ @exceptions = {}
9
+ @proper_names = []
10
+ @synonyms = []
11
+ @synonyms_index = {}
12
+
13
+ yield(self)
14
+
15
+ finalize_index
16
+ end
17
+
18
+ attr_reader :index, :longs, :shorts, :aliases, :exceptions
19
+ attr_reader :proper_names, :synonyms, :synonyms_index
20
+
21
+ def add_name(long, short, aliases = [])
22
+ @longs[Unicode.downcase(short)] = long
23
+ @shorts[Unicode.downcase(long)] = short
24
+ @aliases[Unicode.downcase(long)] = aliases
25
+
26
+ populate_index(long, short, aliases)
27
+ end
28
+
29
+ def add_exception(long, short)
30
+ @exceptions[Unicode.downcase(short)] = [long, short]
31
+ @exceptions[Unicode.downcase(long)] = [long, short]
32
+ end
33
+
34
+ def add_proper_name(name)
35
+ @proper_names << name
36
+ end
37
+
38
+ def add_synonym(*names)
39
+ @synonyms << names
40
+ populate_synonyms_index(names)
41
+ end
42
+
43
+ private
44
+
45
+ def populate_index(long, short, aliases)
46
+ long_downcase = Unicode.downcase(long)
47
+ short_downcase = Unicode.downcase(short)
48
+
49
+ populate_long_permutations(long)
50
+
51
+ if long_downcase != short_downcase
52
+ @index[short_downcase] = long
53
+ @index[short_downcase[0..-2]] = long if short_downcase[-1] == '.'
54
+ end
55
+
56
+ aliases.each { |al| @index[Unicode.downcase(al)] = long }
57
+ end
58
+
59
+ def populate_long_permutations(long)
60
+ Unicode.downcase(long).split(' ').permutation.each do |variant|
61
+ @index[variant.join(' ')] = long
62
+ end
63
+ end
64
+
65
+ def finalize_index
66
+ @index = @index.sort_by { |key, _| key.size }.reverse
67
+ @index = Hash[*@index.flatten]
68
+ end
69
+
70
+ def populate_synonyms_index(names)
71
+ names.each { |name| @synonyms_index[name] = names }
72
+ end
73
+ end
74
+ end
@@ -0,0 +1,62 @@
1
+ module Fias
2
+ module Import
3
+ class Copy
4
+ attr_reader :dbf, :table_name
5
+
6
+ def initialize(db, table_name, dbf, types = {})
7
+ @db = db
8
+ @table_name = table_name.to_sym
9
+ @dbf = dbf
10
+ @encoder = PgDataEncoder::EncodeForCopy.new(
11
+ column_types: map_types(types)
12
+ )
13
+ end
14
+
15
+ def encode
16
+ @dbf.each do |record|
17
+ line = record.to_a.map { |v| v == '' ? nil : v }
18
+ @encoder.add(line)
19
+ yield if block_given?
20
+ end
21
+ end
22
+
23
+ def copy
24
+ prepare
25
+ copy_into
26
+ end
27
+
28
+ private
29
+
30
+ def map_types(types)
31
+ types = types.map do |name, type|
32
+ index = columns.index(name.to_sym)
33
+ [index, type] if index
34
+ end
35
+ Hash[*types.compact.flatten]
36
+ end
37
+
38
+ def columns
39
+ @columns ||= @dbf.columns.map(&:name).map(&:downcase).map(&:to_sym)
40
+ end
41
+
42
+ def prepare
43
+ @db[@table_name].truncate
44
+ @db.run('SET client_min_messages TO warning;')
45
+ end
46
+
47
+ def copy_into
48
+ io = @encoder.get_io
49
+
50
+ @db.copy_into(@table_name.to_sym, columns: columns, format: :binary) do
51
+ begin
52
+ io.readpartial(BLOCK_SIZE)
53
+ rescue EOFError => _e
54
+ nil
55
+ end
56
+ end
57
+ end
58
+
59
+ BLOCK_SIZE = 65_536 # 10_240
60
+ end
61
+ end
62
+ end
@@ -0,0 +1,81 @@
1
+ module Fias
2
+ module Import
3
+ class Dbf
4
+ def initialize(path, encoding = DEFAULT_ENCODING)
5
+ @path = path
6
+ @files = {}
7
+
8
+ unless Dir.exist?(@path)
9
+ fail ArgumentError, "FIAS database path #{@path} does not exists"
10
+ end
11
+
12
+ open_files(encoding)
13
+ end
14
+
15
+ def only(*names)
16
+ return @files if names.empty?
17
+
18
+ names = names.map do |name|
19
+ name = name.to_sym
20
+ name == :houses ? HOUSE_TABLES.keys : name
21
+ name == :nordocs ? NORDOC_TABLES.keys : name
22
+ end
23
+
24
+ names.flatten!
25
+
26
+ @files.slice(*names)
27
+ end
28
+
29
+ attr_reader :files
30
+
31
+ private
32
+
33
+ def open_files(encoding)
34
+ TABLES.each do |accessor, dbf_filename|
35
+ filename = File.join(@path, dbf_filename)
36
+
37
+ next unless File.exist?(filename)
38
+
39
+ dbf = DBF::Table.new(filename, nil, encoding)
40
+ @files[accessor] = dbf if dbf
41
+ end
42
+ end
43
+
44
+ def self.n_tables(title)
45
+ tables = (1..99).map do |n|
46
+ [
47
+ format('%s%0.2d', title, n).to_sym,
48
+ format('%s%0.2d.DBF', title.upcase, n)
49
+ ]
50
+ end
51
+
52
+ tables.flatten!
53
+
54
+ Hash[*tables]
55
+ end
56
+
57
+ HOUSE_TABLES = n_tables('house')
58
+ NORDOC_TABLES = n_tables('nordoc')
59
+
60
+ TABLES = {
61
+ address_object_types: 'SOCRBASE.DBF',
62
+ current_statuses: 'CURENTST.DBF',
63
+ actual_statuses: 'ACTSTAT.DBF',
64
+ operation_statuses: 'OPERSTAT.DBF',
65
+ center_statuses: 'CENTERST.DBF',
66
+ interval_statuses: 'INTVSTAT.DBF',
67
+ estate_statues: 'ESTSTAT.DBF',
68
+ structure_statuses: 'STRSTAT.DBF',
69
+ address_objects: 'ADDROBJ.DBF',
70
+ house_intervals: 'HOUSEINT.DBF',
71
+ landmarks: 'LANDMARK.DBF'
72
+ }.merge(
73
+ HOUSE_TABLES
74
+ ).merge(
75
+ NORDOC_TABLES
76
+ )
77
+
78
+ DEFAULT_ENCODING = Encoding::CP866
79
+ end
80
+ end
81
+ end
@@ -0,0 +1,37 @@
1
+ module Fias
2
+ module Import
3
+ module DownloadService
4
+ def url
5
+ response = HTTParty.post(
6
+ 'http://fias.nalog.ru/WebServices/Public/DownloadService.asmx',
7
+ OPTIONS
8
+ )
9
+
10
+ matches =
11
+ response.body.match(/<FiasCompleteDbfUrl>(.*)<\/FiasCompleteDbfUrl>/)
12
+
13
+ matches[1] if matches
14
+ end
15
+
16
+ OPTIONS = {
17
+ body: %(<?xml version="1.0" encoding="utf-8"?>
18
+ <soap:Envelope
19
+ xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
20
+ xmlns:xsd="http://www.w3.org/2001/XMLSchema"
21
+ xmlns:soap="http://schemas.xmlsoap.org/soap/envelope/">
22
+ <soap:Body>
23
+ <GetLastDownloadFileInfo
24
+ xmlns="http://fias.nalog.ru/WebServices/Public/DownloadService.asmx/" />
25
+ </soap:Body>
26
+ </soap:Envelope>
27
+ ),
28
+ headers: {
29
+ 'SOAPAction' => 'http://fias.nalog.ru/WebServices/Public/DownloadService.asmx/GetLastDownloadFileInfo',
30
+ 'Content-Type' => 'text/xml; encoding=utf-8'
31
+ }
32
+ }
33
+
34
+ module_function :url
35
+ end
36
+ end
37
+ end
@@ -0,0 +1,51 @@
1
+ module Fias
2
+ module Import
3
+ class RestoreParentId
4
+ def initialize(scope, options = {})
5
+ @scope = scope
6
+ @key = options.fetch(:key, :aoguid)
7
+ @parent_key = options.fetch(:parent_key, :parentguid)
8
+ @id = options.fetch(:id, :id)
9
+ @parent_id = options.fetch(:parent_id, :parent_id)
10
+ end
11
+
12
+ def restore
13
+ id_grouped_by_parent_id.each do |parent_id, ids|
14
+ @scope.where(id: ids).update(parent_id: parent_id)
15
+ end
16
+ end
17
+
18
+ private
19
+
20
+ def records
21
+ @records ||= @scope.select_map([@id, @key, @parent_key])
22
+ end
23
+
24
+ def records_by_key
25
+ @records_by_key ||= records.index_by { |r| r[1] }
26
+ end
27
+
28
+ def id_parent_id_tuples
29
+ records.map do |row|
30
+ id, _, key = row
31
+
32
+ if key
33
+ parent_id = records_by_key[key]
34
+ parent_id = parent_id[0] if parent_id
35
+ end
36
+
37
+ [id, parent_id]
38
+ end
39
+ end
40
+
41
+ def id_grouped_by_parent_id
42
+ {}.tap do |rows|
43
+ id_parent_id_tuples.each do |(id, parent_id)|
44
+ rows[parent_id] ||= []
45
+ rows[parent_id] << id
46
+ end
47
+ end
48
+ end
49
+ end
50
+ end
51
+ end
@@ -0,0 +1,74 @@
1
+ module Fias
2
+ module Import
3
+ class Tables
4
+ def initialize(db, files, prefix = DEFAULT_PREFIX)
5
+ @db = db
6
+ @files = files
7
+ @prefix = prefix
8
+ end
9
+
10
+ attr_reader :files
11
+
12
+ def create
13
+ @files.each do |name, dbf|
14
+ next if dbf.blank?
15
+ create_table(name, dbf)
16
+ end
17
+ end
18
+
19
+ def copy
20
+ @files.map do |name, dbf|
21
+ Copy.new(@db, table_name(name), dbf, uuid_column_types(name))
22
+ end
23
+ end
24
+
25
+ private
26
+
27
+ def table_name(name)
28
+ [@prefix, name].delete_if(&:blank?).join('_').to_sym
29
+ end
30
+
31
+ def create_table(name, dbf)
32
+ columns = columns_for(name, dbf)
33
+ @db.create_table(table_name(name)) do
34
+ primary_key :id
35
+ columns.each { |args| column(*args) }
36
+ end
37
+ end
38
+
39
+ def columns_for(name, dbf)
40
+ dbf.columns.map do |column|
41
+ column_for(name, column)
42
+ end
43
+ end
44
+
45
+ def column_for(name, column)
46
+ alter = UUID[name]
47
+ column_name = column.name.downcase
48
+
49
+ parse_c_def(column.schema_definition).tap do |c_def|
50
+ c_def[1] = :uuid if alter && alter.include?(column_name)
51
+ c_def[1] = :text if c_def[1] == :string
52
+ end
53
+ end
54
+
55
+ def parse_c_def(c_def)
56
+ c_def = c_def.strip.split(',').map(&:strip)
57
+ name = c_def[0][1..-2]
58
+ type = c_def[1][1..-1]
59
+ [name, type].map(&:to_sym)
60
+ end
61
+
62
+ def uuid_column_types(name)
63
+ uuid = UUID[name] || []
64
+ Hash[*uuid.zip([:uuid] * uuid.size).flatten]
65
+ end
66
+
67
+ UUID = {
68
+ address_objects: %w(aoguid aoid previd nextid parentguid)
69
+ }
70
+
71
+ DEFAULT_PREFIX = 'fias'
72
+ end
73
+ end
74
+ end
@@ -0,0 +1,30 @@
1
+ module Fias
2
+ module Name
3
+ module Append
4
+ class << self
5
+ def append(name, short_name)
6
+ long, _, short, _ = Canonical.canonical(short_name)
7
+
8
+ exception = Fias.config.exceptions[Unicode.downcase(name)]
9
+ return exception.reverse if exception
10
+
11
+ [concat(short, name), concat(long, name)]
12
+ end
13
+
14
+ private
15
+
16
+ def concat(status, name)
17
+ must_append?(name) ? "#{name} #{status}" : "#{status} #{name}"
18
+ end
19
+
20
+ def must_append?(name)
21
+ ending = name[-2..-1]
22
+ ENDINGS_TO_APPEND.include?(ending) || name =~ JUST_NUMBER
23
+ end
24
+ end
25
+
26
+ ENDINGS_TO_APPEND = %w(ая ий ый)
27
+ JUST_NUMBER = /^\d+([\-А-Яа-яе]{1,3})?$/u
28
+ end
29
+ end
30
+ end
@@ -0,0 +1,42 @@
1
+ module Fias
2
+ module Name
3
+ module Canonical
4
+ class << self
5
+ def canonical(name)
6
+ result = search(name) || search_exception(name)
7
+ result || fail("Unknown abbrevation: #{name}")
8
+ fix_republic_case(result)
9
+ end
10
+
11
+ private
12
+
13
+ def search(key)
14
+ long = Fias.config.index[Unicode.downcase(key)]
15
+ return nil unless long
16
+ short = short_for(long)
17
+ short_stripped = short_for(long).gsub(/\.$/, '')
18
+ [long, short_stripped, short, aliases_for(long)].flatten.compact
19
+ end
20
+
21
+ def short_for(long)
22
+ Fias.config.shorts[Unicode.downcase(long)]
23
+ end
24
+
25
+ def aliases_for(long)
26
+ Fias.config.aliases[Unicode.downcase(long)]
27
+ end
28
+
29
+ def search_exception(name)
30
+ Fias.config.exceptions[Unicode.downcase(name)]
31
+ end
32
+
33
+ def fix_republic_case(canonical)
34
+ return canonical unless canonical[0] == REPUBLIC
35
+ canonical.map { |n| Unicode.upcase(n[0]) + n[1..-1] }
36
+ end
37
+ end
38
+
39
+ REPUBLIC = 'республика'
40
+ end
41
+ end
42
+ end