bise 0.0.1

Sign up to get free protection for your applications and to get access to all the features.
data/.gitignore ADDED
@@ -0,0 +1,5 @@
1
+ *.gem
2
+ .bundle
3
+ Gemfile.lock
4
+ .rvmrc
5
+ pkg/*
data/Gemfile ADDED
@@ -0,0 +1,9 @@
1
+ source "http://rubygems.org"
2
+
3
+ # Specify your gem's dependencies in bise.gemspec
4
+ gemspec
5
+
6
+ group :test do
7
+ gem 'rspec'
8
+ end
9
+
data/LICENSE ADDED
@@ -0,0 +1,21 @@
1
+ Copyright (c) 2011 Eugene Kalenkovich
2
+
3
+ Permission is hereby granted, free of charge, to any person obtaining
4
+ a copy of this software and associated documentation files (the
5
+ "Software"), to deal in the Software without restriction, including
6
+ without limitation the rights to use, copy, modify, merge, publish,
7
+ distribute, sublicense, and/or sell copies of the Software, and to
8
+ permit persons to whom the Software is furnished to do so, subject to
9
+ the following conditions:
10
+
11
+ The above copyright notice and this permission notice shall be
12
+ included in all copies or substantial portions of the Software.
13
+
14
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
15
+ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
16
+ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
17
+ NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
18
+ LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
19
+ OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
20
+ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
21
+
data/README.rdoc ADDED
@@ -0,0 +1,25 @@
1
+ = Bise
2
+ Binary search for presorted text files. Finds the first occurance of a string
3
+ with value >= provided term.
4
+
5
+ Finds the first string in the oredered file that satisfies term <= str.
6
+
7
+ Takes optional comparison block accepting term and str.
8
+
9
+ Default - { |term, str| term <=> str }
10
+
11
+ == Installation
12
+
13
+ gem install rhymes
14
+
15
+
16
+ == Usage
17
+ require 'bise'
18
+ # find word in dictionary
19
+ File.open('/usr/share/dict/words'){|f| f.bin_find('bo')}
20
+
21
+ # find ip-to-country record
22
+ `wget "http://software77.net/geo-ip/?DL=1" -O IpToCountry.csv.gz && gunzip IpToCountry.csv.gz`
23
+ require 'ipaddr'
24
+ File.open('../../files/IpToCountry.csv'){|f| f.bin_find(IPAddr.new('72.4.120.124').to_i){|num,str|
25
+ l,r = str.gsub('"','').split(',').first(2).map(&:to_i); r ||= 0; (l..r) === num ? 0 : num < l ? -1 : 1}}
data/Rakefile ADDED
@@ -0,0 +1 @@
1
+ require "bundler/gem_tasks"
data/bise.gemspec ADDED
@@ -0,0 +1,22 @@
1
+ # -*- encoding: utf-8 -*-
2
+ $:.push File.expand_path("../lib", __FILE__)
3
+ require "bise/version"
4
+
5
+ Gem::Specification.new do |s|
6
+ s.name = "bise"
7
+ s.version = Bise::VERSION
8
+ s.authors = ["Eugene Kalenkovich"]
9
+ s.email = ["rubify@softover.com"]
10
+ s.homepage = ""
11
+ s.summary = %q{Binary search in text files}
12
+ s.description = %q{Binary search in ordered text files}
13
+
14
+ s.rubyforge_project = "bise"
15
+
16
+ s.files = `git ls-files`.split("\n")
17
+ s.test_files = `git ls-files -- {test,spec,features}/*`.split("\n")
18
+ s.executables = `git ls-files -- bin/*`.split("\n").map{ |f| File.basename(f) }
19
+ s.require_paths = ["lib"]
20
+
21
+ s.add_development_dependency "rspec"
22
+ end
data/gem ADDED
@@ -0,0 +1,84 @@
1
+ module BinarySearch
2
+ def bin_find(term)
3
+ if block_given?
4
+ compare = lambda { |a, b| yield(a, b) }
5
+ else
6
+ compare = lambda { |a, b| a <=> b }
7
+ end
8
+ bin_search(term, 0, stat.size, &compare)
9
+ end
10
+
11
+ private
12
+
13
+ def bin_search(term, start, finish, &block)
14
+ pos = (start + finish + 1) / 2
15
+ seek pos
16
+ gets unless pos == 0
17
+ str = gets
18
+ return str if start >= finish
19
+ return nil unless str
20
+ comp = yield(term, str)
21
+ case comp
22
+ when 1
23
+ bin_search(term, pos, finish, &block)
24
+ when -1
25
+ pos == finish ? str : bin_search(term, start, pos, &block)
26
+ else
27
+ str
28
+ end
29
+ end
30
+ end
31
+
32
+ class File < IO
33
+ include BinarySearch
34
+ end
35
+
36
+
37
+ #nums=[]
38
+ #out=true
39
+ #max = (4294967295 - 50331648) + 50331647
40
+ #if ARGV[0] == '-test'
41
+ # n=ARGV[1].to_i
42
+ # n.times{ nums << rand( max + 1 )}
43
+ # out=false
44
+ #else
45
+ # ARGV.each do |argv|
46
+ # nums << argv.to_i #((($1.to_i*256)+$2.to_i)*256+$3.to_i)*256+$4.to_i if argv=~/(\d{1,3})\.(\d{1,3})\.(\d{1,3})\.(\d{1,3})/
47
+ # end
48
+ #end
49
+ #if nums.empty?
50
+ # puts "Please enter valid ip(s)"
51
+ # exit
52
+ #end
53
+ #
54
+ #nums.each do |num|
55
+ # ctry='Unknown'
56
+ # res=bin_find('../files/IpToCountry.csv',num) { |search, str|
57
+ # res = nil
58
+ # if str.empty? || str[0,1]!='"'
59
+ # res = 1
60
+ # else
61
+ # l, r = str.gsub('"','').split(',')[0,2].map(&:to_i)
62
+ # if (l..r) === search
63
+ # res = 0
64
+ # else
65
+ # res = search > r ? 1 : -1
66
+ # end
67
+ # res
68
+ # end
69
+ # }.gsub('"','').split(',')
70
+ # ctry=res[4] if (res[0].to_i..res[1].to_i)===num
71
+ # puts ctry if out
72
+ #end
73
+
74
+ itc_compare = lambda do |num, str|
75
+ return 1 if str.empty? || str[0] != '"'
76
+ l, r = str.gsub('"','').split(',')[0,2].map(&:to_i)
77
+ return 0 if (l..r) === num
78
+ num > r ? 1 : -1
79
+ end
80
+
81
+ num = (($1.to_i << 8 | $2.to_i) << 8 | $3.to_i ) << 8 | $4.to_i if ARGV[0]=~/(\d{1,3})\.(\d{1,3})\.(\d{1,3})\.(\d{1,3})/
82
+ puts File.open('/usr/share/dict/words'){|f| f.bin_find(ARGV[0])}
83
+ puts File.open('../files/IpToCountry.csv'){|f| f.bin_find(num, &itc_compare}
84
+
@@ -0,0 +1,3 @@
1
+ module Bise
2
+ VERSION = "0.0.1"
3
+ end
data/lib/bise.rb ADDED
@@ -0,0 +1,41 @@
1
+ require "bise/version"
2
+
3
+ module Bise
4
+ ##
5
+ # Finds the first string in the presorted text file that satisfies term <= str.
6
+ # Takes optional comparison block accepting term and str.
7
+ # Default - { |term, str| term <=> str }
8
+
9
+ def bin_find(term, &comp)
10
+ comp ||= lambda{|term, str| term <=> str}
11
+ bin_search(term, 0, stat.size, &comp)
12
+ end
13
+
14
+
15
+ private
16
+
17
+ def bin_search(term, start, finish, &comp)
18
+ pos = (start + finish) / 2
19
+ seek pos
20
+ gets unless pos == 0
21
+ str = gets
22
+ return str && str.chomp if start >= finish
23
+ return nil unless str
24
+ case yield(term, str.chomp!)
25
+ when 1
26
+ next_str = gets
27
+ return nil unless next_str
28
+ return next_str if yield(term, next_str.chomp!) != 1
29
+ return bin_search(term, pos, finish, &comp) if start < pos
30
+ start == 0 ? str : next_str
31
+ when -1
32
+ bin_search(term, start, pos, &comp)
33
+ else
34
+ str
35
+ end
36
+ end
37
+ end
38
+
39
+ class File
40
+ include Bise
41
+ end
data/spec/bise_spec.rb ADDED
@@ -0,0 +1,34 @@
1
+ require 'spec_helper'
2
+ #rubs
3
+ #ruby
4
+ #ruby's
5
+ #rubying
6
+ #rucksack
7
+
8
+ describe Bise do
9
+ before(:all) do
10
+ @dict = File.join(File.dirname(__FILE__), 'dict')
11
+ end
12
+
13
+ it 'should find first line for "small" term' do
14
+ File.open(@dict){|f| f.bin_find('a')}.should == 'rubs'
15
+ File.open(@dict){|f| f.bin_find('A'){|t,s| t.downcase <=> s}}.should == 'rubs'
16
+ end
17
+
18
+ it 'should find full word for starting part' do
19
+ File.open(@dict){|f| f.bin_find('rubyi')}.should == 'rubying'
20
+ File.open(@dict){|f| f.bin_find('RuByI'){|t,s| t.downcase <=> s}}.should == 'rubying'
21
+ end
22
+
23
+ it 'should find exact word' do
24
+ File.open(@dict){|f| f.bin_find('ruby')}.should == 'ruby'
25
+ File.open(@dict){|f| f.bin_find('rUbY'){|t,s| t.downcase <=> s}}.should == 'ruby'
26
+ end
27
+
28
+ it 'should find nothing for "big" term' do
29
+ File.open(@dict){|f| f.bin_find('z')}.should == nil
30
+ File.open(@dict){|f| f.bin_find('Z'){|t,s| t.downcase <=> s}}.should == nil
31
+ end
32
+
33
+
34
+ end
data/spec/dict ADDED
@@ -0,0 +1,5 @@
1
+ rubs
2
+ ruby
3
+ ruby's
4
+ rubying
5
+ rucksack
@@ -0,0 +1,7 @@
1
+ require 'rspec'
2
+ require 'bise'
3
+
4
+ RSpec.configure do |config|
5
+ config.color_enabled = true
6
+ config.formatter = 'documentation'
7
+ end
metadata ADDED
@@ -0,0 +1,68 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: bise
3
+ version: !ruby/object:Gem::Version
4
+ version: 0.0.1
5
+ prerelease:
6
+ platform: ruby
7
+ authors:
8
+ - Eugene Kalenkovich
9
+ autorequire:
10
+ bindir: bin
11
+ cert_chain: []
12
+ date: 2011-10-28 00:00:00.000000000Z
13
+ dependencies:
14
+ - !ruby/object:Gem::Dependency
15
+ name: rspec
16
+ requirement: &70027100 !ruby/object:Gem::Requirement
17
+ none: false
18
+ requirements:
19
+ - - ! '>='
20
+ - !ruby/object:Gem::Version
21
+ version: '0'
22
+ type: :development
23
+ prerelease: false
24
+ version_requirements: *70027100
25
+ description: Binary search in ordered text files
26
+ email:
27
+ - rubify@softover.com
28
+ executables: []
29
+ extensions: []
30
+ extra_rdoc_files: []
31
+ files:
32
+ - .gitignore
33
+ - Gemfile
34
+ - LICENSE
35
+ - README.rdoc
36
+ - Rakefile
37
+ - bise.gemspec
38
+ - gem
39
+ - lib/bise.rb
40
+ - lib/bise/version.rb
41
+ - spec/bise_spec.rb
42
+ - spec/dict
43
+ - spec/spec_helper.rb
44
+ homepage: ''
45
+ licenses: []
46
+ post_install_message:
47
+ rdoc_options: []
48
+ require_paths:
49
+ - lib
50
+ required_ruby_version: !ruby/object:Gem::Requirement
51
+ none: false
52
+ requirements:
53
+ - - ! '>='
54
+ - !ruby/object:Gem::Version
55
+ version: '0'
56
+ required_rubygems_version: !ruby/object:Gem::Requirement
57
+ none: false
58
+ requirements:
59
+ - - ! '>='
60
+ - !ruby/object:Gem::Version
61
+ version: '0'
62
+ requirements: []
63
+ rubyforge_project: bise
64
+ rubygems_version: 1.8.11
65
+ signing_key:
66
+ specification_version: 3
67
+ summary: Binary search in text files
68
+ test_files: []