RubyGems - kladr - Versions diffs - 0.1 - Mend

kladr 0.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (23) hide show

data/BASE/ALTNAMES.DBF.gz ADDED Viewed

Binary file

data/BASE/DOMA.DBF ADDED Viewed

Binary file

data/BASE/DOMA.DBF.gz ADDED Viewed

Binary file

data/BASE/FLAT.DBF.gz ADDED Viewed

Binary file

data/BASE/KLADR.DBF.gz ADDED Viewed

Binary file

data/BASE/SOCRBASE.DBF.gz ADDED Viewed

Binary file

data/BASE/STREET.DBF ADDED Viewed

Binary file

data/BASE/STREET.DBF.gz ADDED Viewed

Binary file

data/README ADDED Viewed

File without changes

data/README.ru ADDED Viewed

@@ -0,0 +1,9 @@
+Классификатор адресов России (КЛАДР) живет по адресу http://www.gnivc.ru/downloads/kladr.aspx
+Поскольку сами базы там находятся в редком формате ARJ и весят совсем немного, то они
+прилагаются к этому проекту в сжатом виде в каталоге BASE.
+Библиотека DBF, которая лежит на http://rubyforge.org/projects/dbf в сыром виде к импорту больших DBF
+по причине того, что она при открытии файла вычитывает всю таблицу. Это неприемлемо, поэтому исправленная библиотека
+прилагается.

data/Rakefile ADDED Viewed

@@ -0,0 +1,32 @@
+require 'rubygems'
+require 'rake'
+require 'rake/testtask'
+require 'rake/gempackagetask'
+spec = Gem::Specification.new do |s|
+  s.name = 'kladr'
+  s.version = '0.1'
+  s.summary = 'Importer of russian classificator of addresses'
+#  s.autorequire = 'attacheable'
+  s.author  = "Max Lapshin"
+  s.email   = "max@maxidoors.ru"
+  s.description = ""
+  s.rubyforge_project = "kladr"
+  s.has_rdoc          = false
+  s.files = FileList["**/**"].exclude(".git").to_a
+end
+Rake::GemPackageTask.new(spec) do |package|
+  package.gem_spec = spec
+end
+task :default => [ :test ]
+desc "Run all tests"
+Rake::TestTask.new("test") { |t|
+  t.libs << "test"
+  t.pattern = 'test/*_test.rb'
+  t.verbose = true
+}

data/init.rb ADDED Viewed

	@@ -0,0 +1,2 @@
1	+ $:.unshift(File.dirname(__FILE__)+'/lib')
2	+ require 'kladr'

data/kladr.sqlite3 ADDED Viewed

Binary file

data/lib/dbf/dbf.rb ADDED Viewed

@@ -0,0 +1,6 @@
+require 'date'
+require File.dirname(__FILE__)+'/dbf/globals'
+require File.dirname(__FILE__)+'/dbf/record'
+require File.dirname(__FILE__)+'/dbf/column'
+require File.dirname(__FILE__)+'/dbf/table'

data/lib/dbf/dbf/column.rb ADDED Viewed

@@ -0,0 +1,54 @@
+class Kladr
+module DBF
+  class ColumnLengthError < DBFError; end
+  class Column
+    attr_reader :name, :type, :length, :decimal
+    def initialize(name, type, length, decimal)
+      raise ColumnLengthError, "field length must be greater than 0" unless length > 0
+      @name, @type, @length, @decimal = strip_non_ascii_chars(name), type, length, decimal
+    end
+    def schema_definition
+      "\"#{underscore(name)}\", " +
+      case type
+      when "N" # number
+        if decimal > 0
+          ":float"
+        else
+          ":integer"
+        end
+      when "D" # date
+        ":datetime"
+      when "L" # boolean
+        ":boolean"
+      when "M" # memo
+        ":text"
+      else
+        ":string, :limit => #{length}"
+      end +
+      "\n"
+    end
+    private
+    def underscore(camel_cased_word)
+      camel_cased_word.to_s.gsub(/::/, '/').
+        gsub(/([A-Z]+)([A-Z][a-z])/,'\1_\2').
+        gsub(/([a-z\d])([A-Z])/,'\1_\2').
+        tr("-", "_").
+        downcase
+    end
+    def strip_non_ascii_chars(s)
+      clean = ''
+      s.each_byte do |char|
+        clean << char if char > 31 && char < 128
+      end
+      clean
+    end
+  end
+end
+end

data/lib/dbf/dbf/globals.rb ADDED Viewed

@@ -0,0 +1,30 @@
+class Kladr
+  module DBF
+    DBF_HEADER_SIZE = 32
+    FPT_HEADER_SIZE = 512
+    FPT_BLOCK_HEADER_SIZE = 8
+    DATE_REGEXP = /([\d]{4})([\d]{2})([\d]{2})/
+    VERSION_DESCRIPTIONS = {
+      "02" => "FoxBase",
+      "03" => "dBase III without memo file",
+      "04" => "dBase IV without memo file",
+      "05" => "dBase V without memo file",
+      "30" => "Visual FoxPro",
+      "31" => "Visual FoxPro with AutoIncrement field",
+      "7b" => "dBase IV with memo file",
+      "83" => "dBase III with memo file",
+      "8b" => "dBase IV with memo file",
+      "8e" => "dBase IV with SQL table",
+      "f5" => "FoxPro with memo file",
+      "fb" => "FoxPro without memo file"
+    }
+    MS_PER_SECOND = 1000
+    MS_PER_MINUTE = MS_PER_SECOND * 60
+    MS_PER_HOUR = MS_PER_MINUTE * 60
+    class DBFError < StandardError; end
+    class InvalidColumnName < DBFError; end
+    class InvalidColumnLength < DBFError; end
+  end
+end

data/lib/dbf/dbf/record.rb ADDED Viewed

@@ -0,0 +1,121 @@
+class Kladr
+  module DBF
+    class Record
+      attr_reader :attributes
+      @@accessors_defined = false
+      def initialize(table)
+        @table, @data, @memo = table, table.data, table.memo
+        @attributes = {}
+        initialize_values(table.columns)
+        define_accessors
+        self
+      end
+      private
+      def define_accessors
+        return if @@accessors_defined
+        @table.columns.each do |column|
+          underscored_column_name = underscore(column.name)
+          if @table.options[:accessors] && !respond_to?(underscored_column_name)
+            self.class.send :define_method, underscored_column_name do
+              @attributes[column.name]
+            end
+            @@accessors_defined = true
+          end
+        end
+      end
+      def initialize_values(columns)
+        columns.each do |column|
+          @attributes[column.name] = case column.type
+          when 'N' # number
+            column.decimal.zero? ? unpack_string(column).to_i : unpack_string(column).to_f
+          when 'D' # date
+            raw = unpack_string(column).strip
+            unless raw.empty?
+              parts = raw.match(DATE_REGEXP).captures.map {|n| n.to_i}
+              begin
+                Time.gm(*parts)
+              rescue
+                Date.new(*parts)
+              end
+            end
+          when 'M' # memo
+            starting_block = unpack_string(column).to_i
+            read_memo(starting_block)
+          when 'L' # logical
+            unpack_string(column) =~ /^(y|t)$/i ? true : false
+          when 'I' # integer
+            unpack_integer(column)
+          when 'T' # datetime
+            unpack_datetime(column)
+          else
+            unpack_string(column).strip
+          end
+        end
+      end
+      def unpack_column(column)
+        @data.read(column.length).unpack("a#{column.length}")
+      end
+      def unpack_string(column)
+        unpack_column(column).to_s
+      end
+      def unpack_integer(column)
+        @data.read(column.length).unpack("v").first
+      end
+      def unpack_datetime(column)
+        days, milliseconds = @data.read(column.length).unpack('l2')
+        hours = (milliseconds / MS_PER_HOUR).to_i
+        minutes = ((milliseconds - (hours * MS_PER_HOUR)) / MS_PER_MINUTE).to_i
+        seconds = ((milliseconds - (hours * MS_PER_HOUR) - (minutes * MS_PER_MINUTE)) / MS_PER_SECOND).to_i
+        DateTime.jd(days, hours, minutes, seconds)
+      end
+      def read_memo(start_block)
+        return nil if start_block <= 0 || @table.memo_block_size.nil?
+        @memo.seek(start_block * @table.memo_block_size)
+        if @table.memo_file_format == :fpt
+          memo_type, memo_size, memo_string = @memo.read(@table.memo_block_size).unpack("NNa56")
+          # skip the memo if it isn't text
+          return nil unless memo_type == 1
+          memo_block_content_size = @table.memo_block_size - FPT_BLOCK_HEADER_SIZE
+          if memo_size > memo_block_content_size
+            memo_string << @memo.read(memo_size - @table.memo_block_size + FPT_BLOCK_HEADER_SIZE)
+          elsif memo_size > 0 and memo_size < memo_block_content_size
+            memo_string = memo_string[0, memo_size]
+          end
+        else
+          case @table.version
+          when "83" # dbase iii
+            memo_string = ""
+            loop do
+              memo_string << block = @memo.read(512)
+              break if block.strip.size < 512
+            end
+          when "8b" # dbase iv
+            memo_type, memo_size = @memo.read(8).unpack("LL")
+            memo_string = @memo.read(memo_size)
+          end
+        end
+        memo_string
+      end
+      def underscore(camel_cased_word)
+        camel_cased_word.to_s.gsub(/::/, '/').
+          gsub(/([A-Z]+)([A-Z][a-z])/,'\1_\2').
+          gsub(/([a-z\d])([A-Z])/,'\1_\2').
+          tr("-", "_").
+          downcase
+      end
+    end
+  end
+end

data/lib/dbf/dbf/table.rb ADDED Viewed

@@ -0,0 +1,253 @@
+class Kladr
+  module DBF
+    class Table
+      # The total number of columns (columns)
+      attr_reader :column_count
+      # An array of DBF::Column records
+      attr_reader :columns
+      # Internal dBase version number
+      attr_reader :version
+      # Last updated datetime
+      attr_reader :last_updated
+      # Either :fpt or :dpt
+      attr_reader :memo_file_format
+      # The block size for memo records
+      attr_reader :memo_block_size
+      # The options that were used when initializing DBF::Table.  This is a Hash.
+      attr_reader :options
+      attr_reader :data
+      attr_reader :memo
+      # Initialize a new DBF::Reader.
+      # Example:
+      #   reader = DBF::Reader.new 'data.dbf'
+      def initialize(filename, options = {})
+        @options = {:in_memory => true, :accessors => true}.merge(options)
+        @in_memory = @options[:in_memory]
+        @accessors = @options[:accessors]
+        @data = File.open(filename, 'rb')
+        @memo = open_memo(filename)
+        reload!
+      end
+      # Reloads the database and memo files
+      def reload!
+        @records = nil
+        get_header_info
+        get_memo_header_info if @memo
+        get_column_descriptors
+        build_db_index
+      end
+      # Returns true if there is a corresponding memo file
+      def has_memo_file?
+        @memo ? true : false
+      end
+      # The total number of active records.
+      def record_count
+        @db_index.size
+      end
+      # Returns an instance of DBF::Column for <b>column_name</b>.  <b>column_name</b>
+      # can be a symbol or a string.
+      def column(column_name)
+        @columns.detect {|f| f.name == column_name.to_s}
+      end
+      # An array of all the records contained in the database file.  Each record is an instance
+      # of DBF::Record (or nil if the record is marked for deletion).
+      def records
+        if options[:in_memory]
+          @records ||= get_all_records_from_file
+        else
+          get_all_records_from_file
+        end
+      end
+      alias_method :rows, :records
+      # Returns a DBF::Record (or nil if the record has been marked for deletion) for the record at <tt>index</tt>.
+      def record(index)
+        if options[:in_memory]
+          records[index]
+        else
+          get_record_from_file(index)
+        end
+      end
+      # Find records using a simple ActiveRecord-like syntax.
+      #
+      # Examples:
+      #   reader = DBF::Reader.new 'mydata.dbf'
+      #
+      #   # Find record number 5
+      #   reader.find(5)
+      #
+      #   # Find all records for Keith Morrison
+      #   reader.find :all, :first_name => "Keith", :last_name => "Morrison"
+      #
+      #   # Find first record
+      #   reader.find :first, :first_name => "Keith"
+      #
+      # The <b>command</b> can be an id, :all, or :first.
+      # <b>options</b> is optional and, if specified, should be a hash where the keys correspond
+      # to column names in the database.  The values will be matched exactly with the value
+      # in the database.  If you specify more than one key, all values must match in order
+      # for the record to be returned.  The equivalent SQL would be "WHERE key1 = 'value1'
+      # AND key2 = 'value2'".
+      def find(command, options = {})
+        results = options.empty? ? records : records.select {|record| all_values_match?(record, options)}
+        case command
+        when Fixnum
+          record(command)
+        when :all
+          results
+        when :first
+          results.first
+        end
+      end
+      alias_method :row, :record
+      # Returns a description of the current database file.
+      def version_description
+        VERSION_DESCRIPTIONS[version]
+      end
+      # Returns a database schema in the portable ActiveRecord::Schema format.
+      #
+      # xBase data types are converted to generic types as follows:
+      # - Number columns are converted to :integer if there are no decimals, otherwise
+      #   they are converted to :float
+      # - Date columns are converted to :datetime
+      # - Logical columns are converted to :boolean
+      # - Memo columns are converted to :text
+      # - Character columns are converted to :string and the :limit option is set
+      #   to the length of the character column
+      #
+      # Example:
+      #   create_table "mydata" do |t|
+      #     t.column :name, :string, :limit => 30
+      #     t.column :last_update, :datetime
+      #     t.column :is_active, :boolean
+      #     t.column :age, :integer
+      #     t.column :notes, :text
+      #   end
+      def schema(path = nil)
+        s = "ActiveRecord::Schema.define do\n"
+        s << "  create_table \"#{File.basename(@data.path, ".*")}\" do |t|\n"
+        columns.each do |column|
+          s << "    t.column #{column.schema_definition}"
+        end
+        s << "  end\nend"
+        if path
+          File.open(path, 'w') {|f| f.puts(s)}
+        else
+          s
+        end
+      end
+      private
+        def open_memo(file)
+          %w(fpt FPT dbt DBT).each do |extension|
+            filename = file.sub(/#{File.extname(file)[1..-1]}$/, extension)
+            if File.exists?(filename)
+              @memo_file_format = extension.downcase.to_sym
+              return File.open(filename, 'rb')
+            end
+          end
+          nil
+        end
+        def deleted_record?
+          @data.read(1).unpack('a') == ['*']
+        end
+        def get_header_info
+          @data.rewind
+          @version, @record_count, @header_length, @record_length = @data.read(DBF_HEADER_SIZE).unpack('H2 x3 V v2')
+          @column_count = (@header_length - DBF_HEADER_SIZE + 1) / DBF_HEADER_SIZE
+        end
+        def get_column_descriptors
+          @columns = []
+          @column_count.times do
+            name, type, length, decimal = @data.read(32).unpack('a10 x a x4 C2')
+            if length > 0
+              @columns << Column.new(name.strip, type, length, decimal)
+            end
+          end
+          # Reset the column count
+          @column_count = @columns.size
+          @columns
+        end
+        def get_memo_header_info
+          @memo.rewind
+          if @memo_file_format == :fpt
+            @memo_next_available_block, @memo_block_size = @memo.read(FPT_HEADER_SIZE).unpack('N x2 n')
+          else
+            @memo_block_size = 512
+            @memo_next_available_block = File.size(@memo.path) / @memo_block_size
+          end
+        end
+        def seek(offset)
+          @data.seek(@header_length + offset)
+        end
+        def seek_to_record(index)
+          seek(index * @record_length)
+        end
+        # Returns the record at <tt>index</tt> by seeking to the record in the
+        # physical database file. See the documentation for the records method for
+        # information on how these two methods differ.
+        def get_record_from_file(index)
+          seek_to_record(index)
+          deleted_record? ? nil : Record.new(self)
+        end
+        def get_all_records_from_file
+          all_records = []
+          0.upto(@record_count - 1) do |n|
+            seek_to_record(n)
+            all_records << DBF::Record.new(self) unless deleted_record?
+          end
+          all_records
+        end
+        def build_db_index
+          @db_index = []
+          @deleted_records = []
+          0.upto(@record_count - 1) do |n|
+            #seek_to_record(n)
+            if false && deleted_record?
+              @deleted_records << n
+            else
+              @db_index << n
+            end
+          end
+        end
+        def all_values_match?(record, options)
+          options.map {|key, value| record.attributes[key.to_s] == value}.all?
+        end
+    end
+  end
+end

data/lib/kladr.rb ADDED Viewed

@@ -0,0 +1,150 @@
+require 'dbf/dbf'
+require 'iconv'
+require 'active_support'
+require 'active_record'
+$KCODE = 'u'
+class Kladr
+  def self.exec_streets_schema
+    ActiveRecord::Migration.create_table "streets" do |t|
+      t.column "name", :string, :limit => 40
+      t.column "street_code", :integer
+      t.column "abbrev", :string, :limit => 10
+    end
+    ActiveRecord::Migration.add_index :streets, :street_code
+    ActiveRecord::Migration.add_index :streets, :name
+  end
+  class Street < ActiveRecord::Base
+    has_many :houses
+  end
+  class House < ActiveRecord::Base
+    belongs_to :street
+  end
+  def self.file_unpack(file)
+    return if File.exists?(file)
+    return unless File.exists?(file+".gz")
+    `gzip -cd #{file}.gz > #{file}`
+  end
+  def self.recode(string)
+    Iconv.iconv("UTF-8", "CP866", string).first
+  end
+  def self.street_import(file = File.dirname(__FILE__)+"/../BASE/STREET.DBF")
+    start_time = Time.now
+    file_unpack(file)
+    table = Kladr::DBF::Table.new(file, :in_memory => false)
+    table.columns.each {|c| c.name.replace(c.name.downcase) }
+    exec_streets_schema rescue false
+    table_columns = Street.columns.map(&:name)
+    puts "Table created, importing #{table.record_count} records"
+    count = 0
+    0.upto(table.record_count-1) do |i|
+      record = table.record(i)
+      next unless record
+      city_code = record.attributes["code"][0,2].to_i
+      street_code = record.attributes["code"][11, 4].to_i
+      actuality_code = record.attributes["code"][15,2].to_i
+      next unless city_code == 77 && actuality_code == 0
+      attributes = {:street_code => street_code, :name => recode(record.attributes["name"]), :abbrev => recode(record.attributes["socr"])}.
+        reject {|field, value| !table_columns.include?(field.to_s)}
+      street = Street.create(attributes)
+      puts ("%4d %s %s" % [street.street_code, street.abbrev, street.name])
+      count += 1
+      if count == 1
+        puts "Starting Moscow on #{i} record"
+      end
+    end
+    import_time = Time.now
+    puts "It took #{import_time - start_time} seconds to import #{count} records. #{Time.now - import_time} to build index."
+  end
+  def self.exec_houses_schema
+    ActiveRecord::Migration.create_table "houses" do |t|
+      t.column "number", :string, :limit => 10
+      t.column "street_code", :integer
+      t.column "abbrev", :string, :limit => 10
+      t.column "building", :integer
+      t.column "index", :integer
+      t.column "house_code", :integer
+      t.column "street_id", :integer
+    end
+    ActiveRecord::Migration.add_index :houses, [:street_code, :house_code]
+  end
+  def self.houses_import(file = File.dirname(__FILE__)+"/../BASE/DOMA.DBF")
+    start_time = Time.now
+    file_unpack(file)
+    table = Kladr::DBF::Table.new(file, :in_memory => false)
+    table.columns.each {|c| c.name.replace(c.name.downcase) }
+    exec_houses_schema rescue false
+    puts "Table created, importing #{table.record_count} records"
+    count = 0
+    table_columns = House.columns.map(&:name)
+    0.upto(table.record_count-1) do |i|
+      record = table.record(i)
+      next unless record
+      city_code = record.attributes["code"][0,2].to_i
+      street_code = record.attributes["code"][11, 4].to_i
+      house_code = record.attributes["code"][15, 4].to_i
+      next unless city_code == 77
+      next if street_code == 0
+      attributes = {
+        :street_code => street_code, :house_code => house_code, :abbrev => recode(record.attributes["socr"]).chars.downcase.to_s,
+        :building => recode(record.attributes["korp"]), :index => record.attributes["index"].to_i
+      }.reject {|field, value| !table_columns.include?(field.to_s)}
+      attributes[:numbers] = recode(record.attributes["name"])
+      street = Street.find_by_street_code(street_code)
+      attributes[:street_id] = street.id if street
+      houses = create_houses(attributes)
+      count += houses.length
+      if count == 1
+        puts "Starting Moscow on #{i} record"
+      end
+    end
+    puts "It took #{Time.now - start_time} seconds to import #{count} records."
+  end
+  def self.extract_numbers(numbers)
+    return [] unless numbers
+    numbers.split(",").map do |part|
+      if part.index("-")
+        start_number, end_number = /(\d+)-(\d+)/.match(part).captures.map(&:to_i)
+        step = part.index("(") ? 2 : 1
+        res = []
+        (start_number..end_number).step(step) {|i| res << i.to_s}
+        res
+      else
+        part
+      end
+    end.flatten
+  end
+  def self.create_houses(attributes)
+    numbers = extract_numbers(attributes.delete(:numbers))
+    numbers.each do |number|
+      house = House.create(attributes.merge(:number => number))
+      puts ("%30s %4s" % [house.street && house.street.name || "-", house.number])
+    end
+  end
+  def self.import
+    street_import
+    houses_import
+  end
+end

data/test.rb ADDED Viewed

@@ -0,0 +1,7 @@
+$:.unshift("lib")
+require 'rubygems'
+require 'active_record'
+require 'kladr'
+ActiveRecord::Base.establish_connection(:adapter => "sqlite3", :dbfile => "kladr.sqlite3")

data/test/houses_test.rb ADDED Viewed

@@ -0,0 +1,6 @@
+require File.dirname(__FILE__)+'/test_helper'
+class HousesTest < Test::Unit::TestCase
+  def test_number_extraction
+  end
+end

data/test/test_helper.rb ADDED Viewed

@@ -0,0 +1,9 @@
+require 'test/unit'
+require 'rubygems'
+require 'active_record'
+$KCODE = 'u'
+$:.unshift File.join(File.dirname(__FILE__), '../lib')
+require 'kladr'

metadata ADDED Viewed

@@ -0,0 +1,80 @@
+--- !ruby/object:Gem::Specification
+name: kladr
+version: !ruby/object:Gem::Version
+  version: "0.1"
+platform: ruby
+authors:
+- Max Lapshin
+autorequire:
+bindir: bin
+cert_chain: []
+date: 2008-04-10 00:00:00 +04:00
+default_executable:
+dependencies: []
+description: ""
+email: max@maxidoors.ru
+executables: []
+extensions: []
+extra_rdoc_files: []
+files:
+- BASE
+- BASE/ALTNAMES.DBF.gz
+- BASE/DOMA.DBF
+- BASE/DOMA.DBF.gz
+- BASE/FLAT.DBF.gz
+- BASE/KLADR.DBF.gz
+- BASE/SOCRBASE.DBF.gz
+- BASE/STREET.DBF
+- BASE/STREET.DBF.gz
+- init.rb
+- kladr.sqlite3
+- lib
+- lib/dbf
+- lib/dbf/dbf
+- lib/dbf/dbf/column.rb
+- lib/dbf/dbf/globals.rb
+- lib/dbf/dbf/record.rb
+- lib/dbf/dbf/table.rb
+- lib/dbf/dbf.rb
+- lib/kladr.rb
+- pkg
+- Rakefile
+- README
+- README.ru
+- test
+- test/houses_test.rb
+- test/test_helper.rb
+- test.rb
+has_rdoc: false
+homepage:
+post_install_message:
+rdoc_options: []
+require_paths:
+- lib
+required_ruby_version: !ruby/object:Gem::Requirement
+  requirements:
+  - - ">="
+    - !ruby/object:Gem::Version
+      version: "0"
+  version:
+required_rubygems_version: !ruby/object:Gem::Requirement
+  requirements:
+  - - ">="
+    - !ruby/object:Gem::Version
+      version: "0"
+  version:
+requirements: []
+rubyforge_project: kladr
+rubygems_version: 1.1.0
+signing_key:
+specification_version: 2
+summary: Importer of russian classificator of addresses
+test_files: []