kladr 0.1
Sign up to get free protection for your applications and to get access to all the features.
- data/BASE/ALTNAMES.DBF.gz +0 -0
- data/BASE/DOMA.DBF +0 -0
- data/BASE/DOMA.DBF.gz +0 -0
- data/BASE/FLAT.DBF.gz +0 -0
- data/BASE/KLADR.DBF.gz +0 -0
- data/BASE/SOCRBASE.DBF.gz +0 -0
- data/BASE/STREET.DBF +0 -0
- data/BASE/STREET.DBF.gz +0 -0
- data/README +0 -0
- data/README.ru +9 -0
- data/Rakefile +32 -0
- data/init.rb +2 -0
- data/kladr.sqlite3 +0 -0
- data/lib/dbf/dbf.rb +6 -0
- data/lib/dbf/dbf/column.rb +54 -0
- data/lib/dbf/dbf/globals.rb +30 -0
- data/lib/dbf/dbf/record.rb +121 -0
- data/lib/dbf/dbf/table.rb +253 -0
- data/lib/kladr.rb +150 -0
- data/test.rb +7 -0
- data/test/houses_test.rb +6 -0
- data/test/test_helper.rb +9 -0
- metadata +80 -0
Binary file
|
data/BASE/DOMA.DBF
ADDED
Binary file
|
data/BASE/DOMA.DBF.gz
ADDED
Binary file
|
data/BASE/FLAT.DBF.gz
ADDED
Binary file
|
data/BASE/KLADR.DBF.gz
ADDED
Binary file
|
Binary file
|
data/BASE/STREET.DBF
ADDED
Binary file
|
data/BASE/STREET.DBF.gz
ADDED
Binary file
|
data/README
ADDED
File without changes
|
data/README.ru
ADDED
@@ -0,0 +1,9 @@
|
|
1
|
+
Классификатор адресов России (КЛАДР) живет по адресу http://www.gnivc.ru/downloads/kladr.aspx
|
2
|
+
|
3
|
+
Поскольку сами базы там находятся в редком формате ARJ и весят совсем немного, то они
|
4
|
+
прилагаются к этому проекту в сжатом виде в каталоге BASE.
|
5
|
+
|
6
|
+
Библиотека DBF, которая лежит на http://rubyforge.org/projects/dbf в сыром виде к импорту больших DBF
|
7
|
+
по причине того, что она при открытии файла вычитывает всю таблицу. Это неприемлемо, поэтому исправленная библиотека
|
8
|
+
прилагается.
|
9
|
+
|
data/Rakefile
ADDED
@@ -0,0 +1,32 @@
|
|
1
|
+
require 'rubygems'
|
2
|
+
require 'rake'
|
3
|
+
require 'rake/testtask'
|
4
|
+
require 'rake/gempackagetask'
|
5
|
+
|
6
|
+
spec = Gem::Specification.new do |s|
|
7
|
+
s.name = 'kladr'
|
8
|
+
s.version = '0.1'
|
9
|
+
s.summary = 'Importer of russian classificator of addresses'
|
10
|
+
# s.autorequire = 'attacheable'
|
11
|
+
s.author = "Max Lapshin"
|
12
|
+
s.email = "max@maxidoors.ru"
|
13
|
+
s.description = ""
|
14
|
+
s.rubyforge_project = "kladr"
|
15
|
+
s.has_rdoc = false
|
16
|
+
s.files = FileList["**/**"].exclude(".git").to_a
|
17
|
+
|
18
|
+
end
|
19
|
+
|
20
|
+
Rake::GemPackageTask.new(spec) do |package|
|
21
|
+
package.gem_spec = spec
|
22
|
+
end
|
23
|
+
|
24
|
+
|
25
|
+
task :default => [ :test ]
|
26
|
+
|
27
|
+
desc "Run all tests"
|
28
|
+
Rake::TestTask.new("test") { |t|
|
29
|
+
t.libs << "test"
|
30
|
+
t.pattern = 'test/*_test.rb'
|
31
|
+
t.verbose = true
|
32
|
+
}
|
data/init.rb
ADDED
data/kladr.sqlite3
ADDED
Binary file
|
data/lib/dbf/dbf.rb
ADDED
@@ -0,0 +1,54 @@
|
|
1
|
+
class Kladr
|
2
|
+
module DBF
|
3
|
+
class ColumnLengthError < DBFError; end
|
4
|
+
|
5
|
+
class Column
|
6
|
+
attr_reader :name, :type, :length, :decimal
|
7
|
+
|
8
|
+
def initialize(name, type, length, decimal)
|
9
|
+
raise ColumnLengthError, "field length must be greater than 0" unless length > 0
|
10
|
+
@name, @type, @length, @decimal = strip_non_ascii_chars(name), type, length, decimal
|
11
|
+
end
|
12
|
+
|
13
|
+
def schema_definition
|
14
|
+
"\"#{underscore(name)}\", " +
|
15
|
+
case type
|
16
|
+
when "N" # number
|
17
|
+
if decimal > 0
|
18
|
+
":float"
|
19
|
+
else
|
20
|
+
":integer"
|
21
|
+
end
|
22
|
+
when "D" # date
|
23
|
+
":datetime"
|
24
|
+
when "L" # boolean
|
25
|
+
":boolean"
|
26
|
+
when "M" # memo
|
27
|
+
":text"
|
28
|
+
else
|
29
|
+
":string, :limit => #{length}"
|
30
|
+
end +
|
31
|
+
"\n"
|
32
|
+
end
|
33
|
+
|
34
|
+
private
|
35
|
+
|
36
|
+
def underscore(camel_cased_word)
|
37
|
+
camel_cased_word.to_s.gsub(/::/, '/').
|
38
|
+
gsub(/([A-Z]+)([A-Z][a-z])/,'\1_\2').
|
39
|
+
gsub(/([a-z\d])([A-Z])/,'\1_\2').
|
40
|
+
tr("-", "_").
|
41
|
+
downcase
|
42
|
+
end
|
43
|
+
|
44
|
+
def strip_non_ascii_chars(s)
|
45
|
+
clean = ''
|
46
|
+
s.each_byte do |char|
|
47
|
+
clean << char if char > 31 && char < 128
|
48
|
+
end
|
49
|
+
clean
|
50
|
+
end
|
51
|
+
end
|
52
|
+
|
53
|
+
end
|
54
|
+
end
|
@@ -0,0 +1,30 @@
|
|
1
|
+
class Kladr
|
2
|
+
module DBF
|
3
|
+
DBF_HEADER_SIZE = 32
|
4
|
+
FPT_HEADER_SIZE = 512
|
5
|
+
FPT_BLOCK_HEADER_SIZE = 8
|
6
|
+
DATE_REGEXP = /([\d]{4})([\d]{2})([\d]{2})/
|
7
|
+
VERSION_DESCRIPTIONS = {
|
8
|
+
"02" => "FoxBase",
|
9
|
+
"03" => "dBase III without memo file",
|
10
|
+
"04" => "dBase IV without memo file",
|
11
|
+
"05" => "dBase V without memo file",
|
12
|
+
"30" => "Visual FoxPro",
|
13
|
+
"31" => "Visual FoxPro with AutoIncrement field",
|
14
|
+
"7b" => "dBase IV with memo file",
|
15
|
+
"83" => "dBase III with memo file",
|
16
|
+
"8b" => "dBase IV with memo file",
|
17
|
+
"8e" => "dBase IV with SQL table",
|
18
|
+
"f5" => "FoxPro with memo file",
|
19
|
+
"fb" => "FoxPro without memo file"
|
20
|
+
}
|
21
|
+
|
22
|
+
MS_PER_SECOND = 1000
|
23
|
+
MS_PER_MINUTE = MS_PER_SECOND * 60
|
24
|
+
MS_PER_HOUR = MS_PER_MINUTE * 60
|
25
|
+
|
26
|
+
class DBFError < StandardError; end
|
27
|
+
class InvalidColumnName < DBFError; end
|
28
|
+
class InvalidColumnLength < DBFError; end
|
29
|
+
end
|
30
|
+
end
|
@@ -0,0 +1,121 @@
|
|
1
|
+
class Kladr
|
2
|
+
module DBF
|
3
|
+
class Record
|
4
|
+
attr_reader :attributes
|
5
|
+
|
6
|
+
@@accessors_defined = false
|
7
|
+
|
8
|
+
def initialize(table)
|
9
|
+
@table, @data, @memo = table, table.data, table.memo
|
10
|
+
@attributes = {}
|
11
|
+
initialize_values(table.columns)
|
12
|
+
define_accessors
|
13
|
+
self
|
14
|
+
end
|
15
|
+
|
16
|
+
private
|
17
|
+
|
18
|
+
def define_accessors
|
19
|
+
return if @@accessors_defined
|
20
|
+
@table.columns.each do |column|
|
21
|
+
underscored_column_name = underscore(column.name)
|
22
|
+
if @table.options[:accessors] && !respond_to?(underscored_column_name)
|
23
|
+
self.class.send :define_method, underscored_column_name do
|
24
|
+
@attributes[column.name]
|
25
|
+
end
|
26
|
+
@@accessors_defined = true
|
27
|
+
end
|
28
|
+
end
|
29
|
+
end
|
30
|
+
|
31
|
+
def initialize_values(columns)
|
32
|
+
columns.each do |column|
|
33
|
+
@attributes[column.name] = case column.type
|
34
|
+
when 'N' # number
|
35
|
+
column.decimal.zero? ? unpack_string(column).to_i : unpack_string(column).to_f
|
36
|
+
when 'D' # date
|
37
|
+
raw = unpack_string(column).strip
|
38
|
+
unless raw.empty?
|
39
|
+
parts = raw.match(DATE_REGEXP).captures.map {|n| n.to_i}
|
40
|
+
begin
|
41
|
+
Time.gm(*parts)
|
42
|
+
rescue
|
43
|
+
Date.new(*parts)
|
44
|
+
end
|
45
|
+
end
|
46
|
+
when 'M' # memo
|
47
|
+
starting_block = unpack_string(column).to_i
|
48
|
+
read_memo(starting_block)
|
49
|
+
when 'L' # logical
|
50
|
+
unpack_string(column) =~ /^(y|t)$/i ? true : false
|
51
|
+
when 'I' # integer
|
52
|
+
unpack_integer(column)
|
53
|
+
when 'T' # datetime
|
54
|
+
unpack_datetime(column)
|
55
|
+
else
|
56
|
+
unpack_string(column).strip
|
57
|
+
end
|
58
|
+
end
|
59
|
+
end
|
60
|
+
|
61
|
+
def unpack_column(column)
|
62
|
+
@data.read(column.length).unpack("a#{column.length}")
|
63
|
+
end
|
64
|
+
|
65
|
+
def unpack_string(column)
|
66
|
+
unpack_column(column).to_s
|
67
|
+
end
|
68
|
+
|
69
|
+
def unpack_integer(column)
|
70
|
+
@data.read(column.length).unpack("v").first
|
71
|
+
end
|
72
|
+
|
73
|
+
def unpack_datetime(column)
|
74
|
+
days, milliseconds = @data.read(column.length).unpack('l2')
|
75
|
+
hours = (milliseconds / MS_PER_HOUR).to_i
|
76
|
+
minutes = ((milliseconds - (hours * MS_PER_HOUR)) / MS_PER_MINUTE).to_i
|
77
|
+
seconds = ((milliseconds - (hours * MS_PER_HOUR) - (minutes * MS_PER_MINUTE)) / MS_PER_SECOND).to_i
|
78
|
+
DateTime.jd(days, hours, minutes, seconds)
|
79
|
+
end
|
80
|
+
|
81
|
+
def read_memo(start_block)
|
82
|
+
return nil if start_block <= 0 || @table.memo_block_size.nil?
|
83
|
+
@memo.seek(start_block * @table.memo_block_size)
|
84
|
+
if @table.memo_file_format == :fpt
|
85
|
+
memo_type, memo_size, memo_string = @memo.read(@table.memo_block_size).unpack("NNa56")
|
86
|
+
|
87
|
+
# skip the memo if it isn't text
|
88
|
+
return nil unless memo_type == 1
|
89
|
+
|
90
|
+
memo_block_content_size = @table.memo_block_size - FPT_BLOCK_HEADER_SIZE
|
91
|
+
if memo_size > memo_block_content_size
|
92
|
+
memo_string << @memo.read(memo_size - @table.memo_block_size + FPT_BLOCK_HEADER_SIZE)
|
93
|
+
elsif memo_size > 0 and memo_size < memo_block_content_size
|
94
|
+
memo_string = memo_string[0, memo_size]
|
95
|
+
end
|
96
|
+
else
|
97
|
+
case @table.version
|
98
|
+
when "83" # dbase iii
|
99
|
+
memo_string = ""
|
100
|
+
loop do
|
101
|
+
memo_string << block = @memo.read(512)
|
102
|
+
break if block.strip.size < 512
|
103
|
+
end
|
104
|
+
when "8b" # dbase iv
|
105
|
+
memo_type, memo_size = @memo.read(8).unpack("LL")
|
106
|
+
memo_string = @memo.read(memo_size)
|
107
|
+
end
|
108
|
+
end
|
109
|
+
memo_string
|
110
|
+
end
|
111
|
+
|
112
|
+
def underscore(camel_cased_word)
|
113
|
+
camel_cased_word.to_s.gsub(/::/, '/').
|
114
|
+
gsub(/([A-Z]+)([A-Z][a-z])/,'\1_\2').
|
115
|
+
gsub(/([a-z\d])([A-Z])/,'\1_\2').
|
116
|
+
tr("-", "_").
|
117
|
+
downcase
|
118
|
+
end
|
119
|
+
end
|
120
|
+
end
|
121
|
+
end
|
@@ -0,0 +1,253 @@
|
|
1
|
+
class Kladr
|
2
|
+
module DBF
|
3
|
+
|
4
|
+
class Table
|
5
|
+
# The total number of columns (columns)
|
6
|
+
attr_reader :column_count
|
7
|
+
|
8
|
+
# An array of DBF::Column records
|
9
|
+
attr_reader :columns
|
10
|
+
|
11
|
+
# Internal dBase version number
|
12
|
+
attr_reader :version
|
13
|
+
|
14
|
+
# Last updated datetime
|
15
|
+
attr_reader :last_updated
|
16
|
+
|
17
|
+
# Either :fpt or :dpt
|
18
|
+
attr_reader :memo_file_format
|
19
|
+
|
20
|
+
# The block size for memo records
|
21
|
+
attr_reader :memo_block_size
|
22
|
+
|
23
|
+
# The options that were used when initializing DBF::Table. This is a Hash.
|
24
|
+
attr_reader :options
|
25
|
+
|
26
|
+
attr_reader :data
|
27
|
+
attr_reader :memo
|
28
|
+
|
29
|
+
# Initialize a new DBF::Reader.
|
30
|
+
# Example:
|
31
|
+
# reader = DBF::Reader.new 'data.dbf'
|
32
|
+
def initialize(filename, options = {})
|
33
|
+
@options = {:in_memory => true, :accessors => true}.merge(options)
|
34
|
+
|
35
|
+
@in_memory = @options[:in_memory]
|
36
|
+
@accessors = @options[:accessors]
|
37
|
+
@data = File.open(filename, 'rb')
|
38
|
+
@memo = open_memo(filename)
|
39
|
+
reload!
|
40
|
+
end
|
41
|
+
|
42
|
+
# Reloads the database and memo files
|
43
|
+
def reload!
|
44
|
+
@records = nil
|
45
|
+
get_header_info
|
46
|
+
get_memo_header_info if @memo
|
47
|
+
get_column_descriptors
|
48
|
+
build_db_index
|
49
|
+
end
|
50
|
+
|
51
|
+
# Returns true if there is a corresponding memo file
|
52
|
+
def has_memo_file?
|
53
|
+
@memo ? true : false
|
54
|
+
end
|
55
|
+
|
56
|
+
# The total number of active records.
|
57
|
+
def record_count
|
58
|
+
@db_index.size
|
59
|
+
end
|
60
|
+
|
61
|
+
# Returns an instance of DBF::Column for <b>column_name</b>. <b>column_name</b>
|
62
|
+
# can be a symbol or a string.
|
63
|
+
def column(column_name)
|
64
|
+
@columns.detect {|f| f.name == column_name.to_s}
|
65
|
+
end
|
66
|
+
|
67
|
+
# An array of all the records contained in the database file. Each record is an instance
|
68
|
+
# of DBF::Record (or nil if the record is marked for deletion).
|
69
|
+
def records
|
70
|
+
if options[:in_memory]
|
71
|
+
@records ||= get_all_records_from_file
|
72
|
+
else
|
73
|
+
get_all_records_from_file
|
74
|
+
end
|
75
|
+
end
|
76
|
+
|
77
|
+
alias_method :rows, :records
|
78
|
+
|
79
|
+
# Returns a DBF::Record (or nil if the record has been marked for deletion) for the record at <tt>index</tt>.
|
80
|
+
def record(index)
|
81
|
+
if options[:in_memory]
|
82
|
+
records[index]
|
83
|
+
else
|
84
|
+
get_record_from_file(index)
|
85
|
+
end
|
86
|
+
end
|
87
|
+
|
88
|
+
# Find records using a simple ActiveRecord-like syntax.
|
89
|
+
#
|
90
|
+
# Examples:
|
91
|
+
# reader = DBF::Reader.new 'mydata.dbf'
|
92
|
+
#
|
93
|
+
# # Find record number 5
|
94
|
+
# reader.find(5)
|
95
|
+
#
|
96
|
+
# # Find all records for Keith Morrison
|
97
|
+
# reader.find :all, :first_name => "Keith", :last_name => "Morrison"
|
98
|
+
#
|
99
|
+
# # Find first record
|
100
|
+
# reader.find :first, :first_name => "Keith"
|
101
|
+
#
|
102
|
+
# The <b>command</b> can be an id, :all, or :first.
|
103
|
+
# <b>options</b> is optional and, if specified, should be a hash where the keys correspond
|
104
|
+
# to column names in the database. The values will be matched exactly with the value
|
105
|
+
# in the database. If you specify more than one key, all values must match in order
|
106
|
+
# for the record to be returned. The equivalent SQL would be "WHERE key1 = 'value1'
|
107
|
+
# AND key2 = 'value2'".
|
108
|
+
def find(command, options = {})
|
109
|
+
results = options.empty? ? records : records.select {|record| all_values_match?(record, options)}
|
110
|
+
|
111
|
+
case command
|
112
|
+
when Fixnum
|
113
|
+
record(command)
|
114
|
+
when :all
|
115
|
+
results
|
116
|
+
when :first
|
117
|
+
results.first
|
118
|
+
end
|
119
|
+
end
|
120
|
+
|
121
|
+
alias_method :row, :record
|
122
|
+
|
123
|
+
# Returns a description of the current database file.
|
124
|
+
def version_description
|
125
|
+
VERSION_DESCRIPTIONS[version]
|
126
|
+
end
|
127
|
+
|
128
|
+
# Returns a database schema in the portable ActiveRecord::Schema format.
|
129
|
+
#
|
130
|
+
# xBase data types are converted to generic types as follows:
|
131
|
+
# - Number columns are converted to :integer if there are no decimals, otherwise
|
132
|
+
# they are converted to :float
|
133
|
+
# - Date columns are converted to :datetime
|
134
|
+
# - Logical columns are converted to :boolean
|
135
|
+
# - Memo columns are converted to :text
|
136
|
+
# - Character columns are converted to :string and the :limit option is set
|
137
|
+
# to the length of the character column
|
138
|
+
#
|
139
|
+
# Example:
|
140
|
+
# create_table "mydata" do |t|
|
141
|
+
# t.column :name, :string, :limit => 30
|
142
|
+
# t.column :last_update, :datetime
|
143
|
+
# t.column :is_active, :boolean
|
144
|
+
# t.column :age, :integer
|
145
|
+
# t.column :notes, :text
|
146
|
+
# end
|
147
|
+
def schema(path = nil)
|
148
|
+
s = "ActiveRecord::Schema.define do\n"
|
149
|
+
s << " create_table \"#{File.basename(@data.path, ".*")}\" do |t|\n"
|
150
|
+
columns.each do |column|
|
151
|
+
s << " t.column #{column.schema_definition}"
|
152
|
+
end
|
153
|
+
s << " end\nend"
|
154
|
+
|
155
|
+
if path
|
156
|
+
File.open(path, 'w') {|f| f.puts(s)}
|
157
|
+
else
|
158
|
+
s
|
159
|
+
end
|
160
|
+
end
|
161
|
+
|
162
|
+
private
|
163
|
+
|
164
|
+
def open_memo(file)
|
165
|
+
%w(fpt FPT dbt DBT).each do |extension|
|
166
|
+
filename = file.sub(/#{File.extname(file)[1..-1]}$/, extension)
|
167
|
+
if File.exists?(filename)
|
168
|
+
@memo_file_format = extension.downcase.to_sym
|
169
|
+
return File.open(filename, 'rb')
|
170
|
+
end
|
171
|
+
end
|
172
|
+
nil
|
173
|
+
end
|
174
|
+
|
175
|
+
def deleted_record?
|
176
|
+
@data.read(1).unpack('a') == ['*']
|
177
|
+
end
|
178
|
+
|
179
|
+
def get_header_info
|
180
|
+
@data.rewind
|
181
|
+
@version, @record_count, @header_length, @record_length = @data.read(DBF_HEADER_SIZE).unpack('H2 x3 V v2')
|
182
|
+
@column_count = (@header_length - DBF_HEADER_SIZE + 1) / DBF_HEADER_SIZE
|
183
|
+
end
|
184
|
+
|
185
|
+
def get_column_descriptors
|
186
|
+
@columns = []
|
187
|
+
@column_count.times do
|
188
|
+
name, type, length, decimal = @data.read(32).unpack('a10 x a x4 C2')
|
189
|
+
if length > 0
|
190
|
+
@columns << Column.new(name.strip, type, length, decimal)
|
191
|
+
end
|
192
|
+
end
|
193
|
+
# Reset the column count
|
194
|
+
@column_count = @columns.size
|
195
|
+
|
196
|
+
@columns
|
197
|
+
end
|
198
|
+
|
199
|
+
def get_memo_header_info
|
200
|
+
@memo.rewind
|
201
|
+
if @memo_file_format == :fpt
|
202
|
+
@memo_next_available_block, @memo_block_size = @memo.read(FPT_HEADER_SIZE).unpack('N x2 n')
|
203
|
+
else
|
204
|
+
@memo_block_size = 512
|
205
|
+
@memo_next_available_block = File.size(@memo.path) / @memo_block_size
|
206
|
+
end
|
207
|
+
end
|
208
|
+
|
209
|
+
def seek(offset)
|
210
|
+
@data.seek(@header_length + offset)
|
211
|
+
end
|
212
|
+
|
213
|
+
def seek_to_record(index)
|
214
|
+
seek(index * @record_length)
|
215
|
+
end
|
216
|
+
|
217
|
+
# Returns the record at <tt>index</tt> by seeking to the record in the
|
218
|
+
# physical database file. See the documentation for the records method for
|
219
|
+
# information on how these two methods differ.
|
220
|
+
def get_record_from_file(index)
|
221
|
+
seek_to_record(index)
|
222
|
+
deleted_record? ? nil : Record.new(self)
|
223
|
+
end
|
224
|
+
|
225
|
+
def get_all_records_from_file
|
226
|
+
all_records = []
|
227
|
+
0.upto(@record_count - 1) do |n|
|
228
|
+
seek_to_record(n)
|
229
|
+
all_records << DBF::Record.new(self) unless deleted_record?
|
230
|
+
end
|
231
|
+
all_records
|
232
|
+
end
|
233
|
+
|
234
|
+
def build_db_index
|
235
|
+
@db_index = []
|
236
|
+
@deleted_records = []
|
237
|
+
0.upto(@record_count - 1) do |n|
|
238
|
+
#seek_to_record(n)
|
239
|
+
if false && deleted_record?
|
240
|
+
@deleted_records << n
|
241
|
+
else
|
242
|
+
@db_index << n
|
243
|
+
end
|
244
|
+
end
|
245
|
+
end
|
246
|
+
|
247
|
+
def all_values_match?(record, options)
|
248
|
+
options.map {|key, value| record.attributes[key.to_s] == value}.all?
|
249
|
+
end
|
250
|
+
end
|
251
|
+
|
252
|
+
end
|
253
|
+
end
|
data/lib/kladr.rb
ADDED
@@ -0,0 +1,150 @@
|
|
1
|
+
require 'dbf/dbf'
|
2
|
+
require 'iconv'
|
3
|
+
require 'active_support'
|
4
|
+
require 'active_record'
|
5
|
+
$KCODE = 'u'
|
6
|
+
|
7
|
+
class Kladr
|
8
|
+
|
9
|
+
def self.exec_streets_schema
|
10
|
+
ActiveRecord::Migration.create_table "streets" do |t|
|
11
|
+
t.column "name", :string, :limit => 40
|
12
|
+
t.column "street_code", :integer
|
13
|
+
t.column "abbrev", :string, :limit => 10
|
14
|
+
end
|
15
|
+
ActiveRecord::Migration.add_index :streets, :street_code
|
16
|
+
ActiveRecord::Migration.add_index :streets, :name
|
17
|
+
end
|
18
|
+
|
19
|
+
|
20
|
+
class Street < ActiveRecord::Base
|
21
|
+
has_many :houses
|
22
|
+
end
|
23
|
+
|
24
|
+
class House < ActiveRecord::Base
|
25
|
+
belongs_to :street
|
26
|
+
end
|
27
|
+
|
28
|
+
def self.file_unpack(file)
|
29
|
+
return if File.exists?(file)
|
30
|
+
return unless File.exists?(file+".gz")
|
31
|
+
`gzip -cd #{file}.gz > #{file}`
|
32
|
+
end
|
33
|
+
|
34
|
+
def self.recode(string)
|
35
|
+
Iconv.iconv("UTF-8", "CP866", string).first
|
36
|
+
end
|
37
|
+
|
38
|
+
def self.street_import(file = File.dirname(__FILE__)+"/../BASE/STREET.DBF")
|
39
|
+
start_time = Time.now
|
40
|
+
file_unpack(file)
|
41
|
+
table = Kladr::DBF::Table.new(file, :in_memory => false)
|
42
|
+
table.columns.each {|c| c.name.replace(c.name.downcase) }
|
43
|
+
|
44
|
+
exec_streets_schema rescue false
|
45
|
+
|
46
|
+
table_columns = Street.columns.map(&:name)
|
47
|
+
|
48
|
+
puts "Table created, importing #{table.record_count} records"
|
49
|
+
count = 0
|
50
|
+
0.upto(table.record_count-1) do |i|
|
51
|
+
record = table.record(i)
|
52
|
+
next unless record
|
53
|
+
city_code = record.attributes["code"][0,2].to_i
|
54
|
+
street_code = record.attributes["code"][11, 4].to_i
|
55
|
+
actuality_code = record.attributes["code"][15,2].to_i
|
56
|
+
|
57
|
+
next unless city_code == 77 && actuality_code == 0
|
58
|
+
attributes = {:street_code => street_code, :name => recode(record.attributes["name"]), :abbrev => recode(record.attributes["socr"])}.
|
59
|
+
reject {|field, value| !table_columns.include?(field.to_s)}
|
60
|
+
street = Street.create(attributes)
|
61
|
+
puts ("%4d %s %s" % [street.street_code, street.abbrev, street.name])
|
62
|
+
count += 1
|
63
|
+
if count == 1
|
64
|
+
puts "Starting Moscow on #{i} record"
|
65
|
+
end
|
66
|
+
end
|
67
|
+
import_time = Time.now
|
68
|
+
|
69
|
+
puts "It took #{import_time - start_time} seconds to import #{count} records. #{Time.now - import_time} to build index."
|
70
|
+
end
|
71
|
+
|
72
|
+
def self.exec_houses_schema
|
73
|
+
ActiveRecord::Migration.create_table "houses" do |t|
|
74
|
+
t.column "number", :string, :limit => 10
|
75
|
+
t.column "street_code", :integer
|
76
|
+
t.column "abbrev", :string, :limit => 10
|
77
|
+
t.column "building", :integer
|
78
|
+
t.column "index", :integer
|
79
|
+
t.column "house_code", :integer
|
80
|
+
t.column "street_id", :integer
|
81
|
+
end
|
82
|
+
ActiveRecord::Migration.add_index :houses, [:street_code, :house_code]
|
83
|
+
end
|
84
|
+
|
85
|
+
def self.houses_import(file = File.dirname(__FILE__)+"/../BASE/DOMA.DBF")
|
86
|
+
start_time = Time.now
|
87
|
+
file_unpack(file)
|
88
|
+
table = Kladr::DBF::Table.new(file, :in_memory => false)
|
89
|
+
table.columns.each {|c| c.name.replace(c.name.downcase) }
|
90
|
+
|
91
|
+
exec_houses_schema rescue false
|
92
|
+
|
93
|
+
puts "Table created, importing #{table.record_count} records"
|
94
|
+
count = 0
|
95
|
+
table_columns = House.columns.map(&:name)
|
96
|
+
|
97
|
+
0.upto(table.record_count-1) do |i|
|
98
|
+
record = table.record(i)
|
99
|
+
next unless record
|
100
|
+
|
101
|
+
city_code = record.attributes["code"][0,2].to_i
|
102
|
+
street_code = record.attributes["code"][11, 4].to_i
|
103
|
+
house_code = record.attributes["code"][15, 4].to_i
|
104
|
+
next unless city_code == 77
|
105
|
+
next if street_code == 0
|
106
|
+
attributes = {
|
107
|
+
:street_code => street_code, :house_code => house_code, :abbrev => recode(record.attributes["socr"]).chars.downcase.to_s,
|
108
|
+
:building => recode(record.attributes["korp"]), :index => record.attributes["index"].to_i
|
109
|
+
}.reject {|field, value| !table_columns.include?(field.to_s)}
|
110
|
+
attributes[:numbers] = recode(record.attributes["name"])
|
111
|
+
street = Street.find_by_street_code(street_code)
|
112
|
+
attributes[:street_id] = street.id if street
|
113
|
+
houses = create_houses(attributes)
|
114
|
+
count += houses.length
|
115
|
+
if count == 1
|
116
|
+
puts "Starting Moscow on #{i} record"
|
117
|
+
end
|
118
|
+
end
|
119
|
+
puts "It took #{Time.now - start_time} seconds to import #{count} records."
|
120
|
+
end
|
121
|
+
|
122
|
+
def self.extract_numbers(numbers)
|
123
|
+
return [] unless numbers
|
124
|
+
numbers.split(",").map do |part|
|
125
|
+
if part.index("-")
|
126
|
+
start_number, end_number = /(\d+)-(\d+)/.match(part).captures.map(&:to_i)
|
127
|
+
step = part.index("(") ? 2 : 1
|
128
|
+
res = []
|
129
|
+
(start_number..end_number).step(step) {|i| res << i.to_s}
|
130
|
+
res
|
131
|
+
else
|
132
|
+
part
|
133
|
+
end
|
134
|
+
end.flatten
|
135
|
+
end
|
136
|
+
|
137
|
+
def self.create_houses(attributes)
|
138
|
+
numbers = extract_numbers(attributes.delete(:numbers))
|
139
|
+
numbers.each do |number|
|
140
|
+
house = House.create(attributes.merge(:number => number))
|
141
|
+
puts ("%30s %4s" % [house.street && house.street.name || "-", house.number])
|
142
|
+
end
|
143
|
+
end
|
144
|
+
|
145
|
+
def self.import
|
146
|
+
street_import
|
147
|
+
houses_import
|
148
|
+
end
|
149
|
+
|
150
|
+
end
|
data/test.rb
ADDED
data/test/houses_test.rb
ADDED
data/test/test_helper.rb
ADDED
metadata
ADDED
@@ -0,0 +1,80 @@
|
|
1
|
+
--- !ruby/object:Gem::Specification
|
2
|
+
name: kladr
|
3
|
+
version: !ruby/object:Gem::Version
|
4
|
+
version: "0.1"
|
5
|
+
platform: ruby
|
6
|
+
authors:
|
7
|
+
- Max Lapshin
|
8
|
+
autorequire:
|
9
|
+
bindir: bin
|
10
|
+
cert_chain: []
|
11
|
+
|
12
|
+
date: 2008-04-10 00:00:00 +04:00
|
13
|
+
default_executable:
|
14
|
+
dependencies: []
|
15
|
+
|
16
|
+
description: ""
|
17
|
+
email: max@maxidoors.ru
|
18
|
+
executables: []
|
19
|
+
|
20
|
+
extensions: []
|
21
|
+
|
22
|
+
extra_rdoc_files: []
|
23
|
+
|
24
|
+
files:
|
25
|
+
- BASE
|
26
|
+
- BASE/ALTNAMES.DBF.gz
|
27
|
+
- BASE/DOMA.DBF
|
28
|
+
- BASE/DOMA.DBF.gz
|
29
|
+
- BASE/FLAT.DBF.gz
|
30
|
+
- BASE/KLADR.DBF.gz
|
31
|
+
- BASE/SOCRBASE.DBF.gz
|
32
|
+
- BASE/STREET.DBF
|
33
|
+
- BASE/STREET.DBF.gz
|
34
|
+
- init.rb
|
35
|
+
- kladr.sqlite3
|
36
|
+
- lib
|
37
|
+
- lib/dbf
|
38
|
+
- lib/dbf/dbf
|
39
|
+
- lib/dbf/dbf/column.rb
|
40
|
+
- lib/dbf/dbf/globals.rb
|
41
|
+
- lib/dbf/dbf/record.rb
|
42
|
+
- lib/dbf/dbf/table.rb
|
43
|
+
- lib/dbf/dbf.rb
|
44
|
+
- lib/kladr.rb
|
45
|
+
- pkg
|
46
|
+
- Rakefile
|
47
|
+
- README
|
48
|
+
- README.ru
|
49
|
+
- test
|
50
|
+
- test/houses_test.rb
|
51
|
+
- test/test_helper.rb
|
52
|
+
- test.rb
|
53
|
+
has_rdoc: false
|
54
|
+
homepage:
|
55
|
+
post_install_message:
|
56
|
+
rdoc_options: []
|
57
|
+
|
58
|
+
require_paths:
|
59
|
+
- lib
|
60
|
+
required_ruby_version: !ruby/object:Gem::Requirement
|
61
|
+
requirements:
|
62
|
+
- - ">="
|
63
|
+
- !ruby/object:Gem::Version
|
64
|
+
version: "0"
|
65
|
+
version:
|
66
|
+
required_rubygems_version: !ruby/object:Gem::Requirement
|
67
|
+
requirements:
|
68
|
+
- - ">="
|
69
|
+
- !ruby/object:Gem::Version
|
70
|
+
version: "0"
|
71
|
+
version:
|
72
|
+
requirements: []
|
73
|
+
|
74
|
+
rubyforge_project: kladr
|
75
|
+
rubygems_version: 1.1.0
|
76
|
+
signing_key:
|
77
|
+
specification_version: 2
|
78
|
+
summary: Importer of russian classificator of addresses
|
79
|
+
test_files: []
|
80
|
+
|