file_bsearch 1.0.2
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +7 -0
- data/.gitignore +15 -0
- data/.rspec +2 -0
- data/.rubocop.yml +42 -0
- data/.travis.yml +3 -0
- data/Gemfile +4 -0
- data/LICENSE.txt +22 -0
- data/README.ja.md +65 -0
- data/README.md +68 -0
- data/Rakefile +7 -0
- data/file_bsearch.gemspec +25 -0
- data/lib/file_bsearch/common.rb +155 -0
- data/lib/file_bsearch/get_lines.rb +147 -0
- data/lib/file_bsearch/main.rb +29 -0
- data/lib/file_bsearch/version.rb +3 -0
- data/lib/file_bsearch.rb +34 -0
- data/spec/file_bsearch_spec.rb +144 -0
- data/spec/lib/my_spec_helper.rb +32 -0
- data/spec/spec_helper.rb +4 -0
- metadata +122 -0
checksums.yaml
ADDED
@@ -0,0 +1,7 @@
|
|
1
|
+
---
|
2
|
+
SHA1:
|
3
|
+
metadata.gz: 47cd4336ef5b84ecf2abcc8ec596ad43245ae679
|
4
|
+
data.tar.gz: 0fab662f8d7ebda542d9d3e7eca1b53b9f7a69d6
|
5
|
+
SHA512:
|
6
|
+
metadata.gz: e567fbb1af6fd494739b30d4e101c901ef16c53ba08a2b6bd035c2028b14e1bb593ea36460283fcb8bb7012fa5099b5994d219cf221022694b9ce37f841afc7a
|
7
|
+
data.tar.gz: 880234a6190ee6448db8d9fed3d372bd4e7d1812063d8ed11733062d2bbd0aa0712c03bef56d720facab666740c5d0a9e3f83c402399ecd9035a8d0075c3dfce
|
data/.gitignore
ADDED
data/.rspec
ADDED
data/.rubocop.yml
ADDED
@@ -0,0 +1,42 @@
|
|
1
|
+
# This configuration was generated by `rubocop --auto-gen-config`
|
2
|
+
# on 2014-09-23 13:01:37 +0900 using RuboCop version 0.26.1.
|
3
|
+
# The point is for the user to remove these configuration records
|
4
|
+
# one by one as the offenses are removed from the code base.
|
5
|
+
# Note that changes in the inspected code, or installation of new
|
6
|
+
# versions of RuboCop, may require this file to be generated again.
|
7
|
+
|
8
|
+
# Configuration parameters: CountComments.
|
9
|
+
Metrics/MethodLength:
|
10
|
+
Max: 15
|
11
|
+
|
12
|
+
Style/Documentation:
|
13
|
+
Enabled: false
|
14
|
+
|
15
|
+
Style/EmptyLinesAroundBody:
|
16
|
+
Enabled: false
|
17
|
+
|
18
|
+
# Configuration parameters: SupportedStyles.
|
19
|
+
Style/HashSyntax:
|
20
|
+
EnforcedStyle: hash_rockets
|
21
|
+
|
22
|
+
# Configuration parameters: EnforcedStyle, MinBodyLength, SupportedStyles.
|
23
|
+
Style/Next:
|
24
|
+
Enabled: false
|
25
|
+
|
26
|
+
# Configuration parameters: PreferredDelimiters.
|
27
|
+
Style/PercentLiteralDelimiters:
|
28
|
+
Enabled: false
|
29
|
+
|
30
|
+
Style/RegexpLiteral:
|
31
|
+
MaxSlashes: 0
|
32
|
+
|
33
|
+
# Configuration parameters: EnforcedStyle, SupportedStyles.
|
34
|
+
Style/StringLiterals:
|
35
|
+
Enabled: false
|
36
|
+
|
37
|
+
# Configuration parameters: EnforcedStyle, SupportedStyles.
|
38
|
+
Style/TrailingBlankLines:
|
39
|
+
Enabled: false
|
40
|
+
|
41
|
+
Style/UnneededPercentQ:
|
42
|
+
Enabled: false
|
data/.travis.yml
ADDED
data/Gemfile
ADDED
data/LICENSE.txt
ADDED
@@ -0,0 +1,22 @@
|
|
1
|
+
Copyright (c) 2014 TODO: Write your name
|
2
|
+
|
3
|
+
MIT License
|
4
|
+
|
5
|
+
Permission is hereby granted, free of charge, to any person obtaining
|
6
|
+
a copy of this software and associated documentation files (the
|
7
|
+
"Software"), to deal in the Software without restriction, including
|
8
|
+
without limitation the rights to use, copy, modify, merge, publish,
|
9
|
+
distribute, sublicense, and/or sell copies of the Software, and to
|
10
|
+
permit persons to whom the Software is furnished to do so, subject to
|
11
|
+
the following conditions:
|
12
|
+
|
13
|
+
The above copyright notice and this permission notice shall be
|
14
|
+
included in all copies or substantial portions of the Software.
|
15
|
+
|
16
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
17
|
+
EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
|
18
|
+
MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
19
|
+
NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
|
20
|
+
LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
|
21
|
+
OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
|
22
|
+
WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
data/README.ja.md
ADDED
@@ -0,0 +1,65 @@
|
|
1
|
+
FileBsearch
|
2
|
+
====
|
3
|
+
|
4
|
+
Ruby 用のライブラリです。
|
5
|
+
|
6
|
+
ソート済みのテキストファイルに対してバイナリサーチ (二分探索) をおこないます。巨大なファイルに対して効果的です。
|
7
|
+
|
8
|
+
## Installation
|
9
|
+
|
10
|
+
```ruby
|
11
|
+
gem install file_bsearch
|
12
|
+
```
|
13
|
+
|
14
|
+
## Usage
|
15
|
+
|
16
|
+
### 指定文字列の行が、ファイル内に存在するか
|
17
|
+
|
18
|
+
```ruby
|
19
|
+
FileBsearch.exist?(path, string)
|
20
|
+
|
21
|
+
File.bsearch?(path, string)
|
22
|
+
|
23
|
+
file = open(path)
|
24
|
+
file.bsearch?(string)
|
25
|
+
```
|
26
|
+
|
27
|
+
存在する場合は true 、存在しない場合は false が返ります。
|
28
|
+
|
29
|
+
### 指定文字列の行が、ファイル内のどの位置にあるか
|
30
|
+
|
31
|
+
```ruby
|
32
|
+
FileBsearch.index(path, string)
|
33
|
+
|
34
|
+
File.bsearch(path, string)
|
35
|
+
|
36
|
+
file = open(path)
|
37
|
+
file.bsearch(string)
|
38
|
+
```
|
39
|
+
|
40
|
+
存在する場合はその行の開始位置、存在しない場合は false が返ります。
|
41
|
+
|
42
|
+
### 指定文字列から始まる行の取得
|
43
|
+
|
44
|
+
```ruby
|
45
|
+
FileBsearch.get_lines(path, prefix)
|
46
|
+
|
47
|
+
File.bsearch_lines(path, prefix)
|
48
|
+
|
49
|
+
file = open(path)
|
50
|
+
file.bsearch_lines(prefix)
|
51
|
+
```
|
52
|
+
|
53
|
+
存在する場合はそれらの行の文字列を含んだ配列、存在しない場合は空の配列が返ります。
|
54
|
+
|
55
|
+
## Contributing
|
56
|
+
|
57
|
+
1. Fork it ( https://github.com/indeep-xyz/ruby-file-bsearch/fork )
|
58
|
+
2. Create your feature branch (`git checkout -b my-new-feature`)
|
59
|
+
3. Commit your changes (`git commit -am 'Add some feature'`)
|
60
|
+
4. Push to the branch (`git push origin my-new-feature`)
|
61
|
+
5. Create a new Pull Request
|
62
|
+
|
63
|
+
## Author
|
64
|
+
|
65
|
+
[indeep-xyz](http://indeep.xyz/)
|
data/README.md
ADDED
@@ -0,0 +1,68 @@
|
|
1
|
+
FileBsearch
|
2
|
+
====
|
3
|
+
|
4
|
+
library for Ruby.
|
5
|
+
|
6
|
+
binary search for sorted text file. it is effective when file size is bigger.
|
7
|
+
|
8
|
+
## Installation
|
9
|
+
|
10
|
+
```ruby
|
11
|
+
gem install file_bsearch
|
12
|
+
```
|
13
|
+
|
14
|
+
## Usage
|
15
|
+
|
16
|
+
### check to exist line that identical with passed string
|
17
|
+
|
18
|
+
```ruby
|
19
|
+
FileBsearch.exist?(path, string)
|
20
|
+
|
21
|
+
File.bsearch?(path, string)
|
22
|
+
|
23
|
+
file = open(path)
|
24
|
+
file.bsearch?(string)
|
25
|
+
```
|
26
|
+
|
27
|
+
if exists, return true.
|
28
|
+
if not exists, return false.
|
29
|
+
|
30
|
+
### search position in file for the line that is identical with passed string
|
31
|
+
|
32
|
+
```ruby
|
33
|
+
FileBsearch.index(path, string)
|
34
|
+
|
35
|
+
File.bsearch(path, string)
|
36
|
+
|
37
|
+
file = open(path)
|
38
|
+
file.bsearch(string)
|
39
|
+
```
|
40
|
+
|
41
|
+
if exists, return Integer object that is position in file for matched line.
|
42
|
+
if not exists, return false.
|
43
|
+
|
44
|
+
### get lines that matched string with passed prefix
|
45
|
+
|
46
|
+
```ruby
|
47
|
+
FileBsearch.get_lines(path, prefix)
|
48
|
+
|
49
|
+
File.bsearch_lines(path, prefix)
|
50
|
+
|
51
|
+
file = open(path)
|
52
|
+
file.bsearch_lines(prefix)
|
53
|
+
```
|
54
|
+
|
55
|
+
if exists, return Array object that include matched lines.
|
56
|
+
if not exists, return empty Array object.
|
57
|
+
|
58
|
+
## Contributing
|
59
|
+
|
60
|
+
1. Fork it ( https://github.com/indeep-xyz/ruby-file-bsearch/fork )
|
61
|
+
2. Create your feature branch (`git checkout -b my-new-feature`)
|
62
|
+
3. Commit your changes (`git commit -am 'Add some feature'`)
|
63
|
+
4. Push to the branch (`git push origin my-new-feature`)
|
64
|
+
5. Create a new Pull Request
|
65
|
+
|
66
|
+
## Author
|
67
|
+
|
68
|
+
[indeep-xyz](http://indeep.xyz/)
|
data/Rakefile
ADDED
@@ -0,0 +1,25 @@
|
|
1
|
+
# coding: utf-8
|
2
|
+
lib = File.expand_path('../lib', __FILE__)
|
3
|
+
$LOAD_PATH.unshift(lib) unless $LOAD_PATH.include?(lib)
|
4
|
+
require 'file_bsearch/version'
|
5
|
+
|
6
|
+
Gem::Specification.new do |spec|
|
7
|
+
spec.name = "file_bsearch"
|
8
|
+
spec.version = FileBsearch::VERSION
|
9
|
+
spec.authors = ["indeep-xyz"]
|
10
|
+
spec.email = ["indeep.xyz@gmail.com"]
|
11
|
+
spec.summary = %q{binary search for sorted text file.}
|
12
|
+
spec.description = %q{binary search for sorted text file. it is effective when file size is bigger.}
|
13
|
+
spec.homepage = "https://github.com/indeep-xyz/ruby-file-bsearch/"
|
14
|
+
spec.license = "MIT"
|
15
|
+
|
16
|
+
spec.files = `git ls-files -z`.split("\x0")
|
17
|
+
spec.executables = spec.files.grep(%r{^bin/}) { |f| File.basename(f) }
|
18
|
+
spec.test_files = spec.files.grep(%r{^(test|spec|features)/})
|
19
|
+
spec.require_paths = ["lib"]
|
20
|
+
|
21
|
+
spec.add_development_dependency "bundler", "~> 1.7"
|
22
|
+
spec.add_development_dependency "rake", "~> 10.0"
|
23
|
+
spec.add_development_dependency "rspec"
|
24
|
+
spec.add_development_dependency "rubocop"
|
25
|
+
end
|
@@ -0,0 +1,155 @@
|
|
1
|
+
module FileBsearch
|
2
|
+
class << self
|
3
|
+
|
4
|
+
private
|
5
|
+
|
6
|
+
# get backward lines
|
7
|
+
#
|
8
|
+
# args
|
9
|
+
# file ... File object
|
10
|
+
# #pos value should be at SOL (Start Of Line)
|
11
|
+
# size ... indication of reading size
|
12
|
+
#
|
13
|
+
# returner
|
14
|
+
# String object as lines
|
15
|
+
def backward_lines(file, size = 512)
|
16
|
+
|
17
|
+
result = ""
|
18
|
+
|
19
|
+
while file.pos > 0
|
20
|
+
|
21
|
+
file.seek(-1, IO::SEEK_CUR)
|
22
|
+
char = file.getc
|
23
|
+
|
24
|
+
if char.match(/[\r\n]/) && result.size > size
|
25
|
+
break
|
26
|
+
else
|
27
|
+
result.insert(0, char)
|
28
|
+
file.seek(-1, IO::SEEK_CUR)
|
29
|
+
end
|
30
|
+
end
|
31
|
+
|
32
|
+
result
|
33
|
+
end
|
34
|
+
|
35
|
+
# get a line string at current position
|
36
|
+
#
|
37
|
+
# args
|
38
|
+
# file ... File object
|
39
|
+
def current_line(file)
|
40
|
+
|
41
|
+
if file.pos > 0
|
42
|
+
|
43
|
+
# move pointer to before character
|
44
|
+
file.seek(-1, IO::SEEK_CUR)
|
45
|
+
|
46
|
+
# loop
|
47
|
+
# - move pointer until reach to EOL of before line.
|
48
|
+
until file.getc.match(/[\n\r]/)
|
49
|
+
|
50
|
+
# move pointer to before character
|
51
|
+
if file.pos > 1
|
52
|
+
file.seek(-2, IO::SEEK_CUR)
|
53
|
+
else
|
54
|
+
|
55
|
+
# if EOS, break
|
56
|
+
file.rewind
|
57
|
+
break
|
58
|
+
end
|
59
|
+
end
|
60
|
+
end
|
61
|
+
|
62
|
+
file.gets
|
63
|
+
end
|
64
|
+
|
65
|
+
# get forward lines
|
66
|
+
#
|
67
|
+
# args
|
68
|
+
# file ... File object
|
69
|
+
# #pos value should be at SOL (Start Of Line)
|
70
|
+
# size ... indication of reading size
|
71
|
+
#
|
72
|
+
# returner
|
73
|
+
# String object as lines
|
74
|
+
def forward_lines(file, size = 512)
|
75
|
+
|
76
|
+
result = ""
|
77
|
+
|
78
|
+
while result.size < size && !file.eof?
|
79
|
+
|
80
|
+
result += file.gets
|
81
|
+
end
|
82
|
+
|
83
|
+
result
|
84
|
+
end
|
85
|
+
|
86
|
+
# args
|
87
|
+
# file ... File object
|
88
|
+
# needle ... search text
|
89
|
+
# min ... min of file pointer for search
|
90
|
+
# max ... max of file pointer for search
|
91
|
+
# block ... comparison process
|
92
|
+
#
|
93
|
+
# returner
|
94
|
+
# position in file
|
95
|
+
def scan(file, needle, min = 0, max = nil, &block)
|
96
|
+
|
97
|
+
max ||= file.size
|
98
|
+
block ||= proc { |a, b| a <=> b }
|
99
|
+
needle = needle.to_s
|
100
|
+
old_pos = -1
|
101
|
+
|
102
|
+
# reset pointer
|
103
|
+
file.rewind
|
104
|
+
|
105
|
+
# loop for search
|
106
|
+
# - if over max point, not found
|
107
|
+
# - if EOF, not found
|
108
|
+
# - if pointer did not moved, not found
|
109
|
+
while max > file.pos \
|
110
|
+
&& (line = current_line(file)) \
|
111
|
+
&& file.pos != old_pos
|
112
|
+
|
113
|
+
old_pos = file.pos
|
114
|
+
code = block.call(line.chomp, needle)
|
115
|
+
|
116
|
+
# for debug
|
117
|
+
# p "-- #{needle}, " + {
|
118
|
+
# code: code,
|
119
|
+
# text: line.chomp,
|
120
|
+
# min: min,
|
121
|
+
# max: max,
|
122
|
+
# pos: file.pos
|
123
|
+
# }.to_s
|
124
|
+
# sleep 0.05
|
125
|
+
|
126
|
+
# comparison
|
127
|
+
# - if not match, update either range values
|
128
|
+
# - if match, return
|
129
|
+
case code
|
130
|
+
when -1 then min = file.pos - 1
|
131
|
+
when 1 then max = file.pos - line.length
|
132
|
+
else return file.pos - line.length
|
133
|
+
end
|
134
|
+
|
135
|
+
# move to mid point
|
136
|
+
file.seek((min + max) / 2)
|
137
|
+
end
|
138
|
+
|
139
|
+
false
|
140
|
+
end
|
141
|
+
|
142
|
+
# args
|
143
|
+
# target ... File object || path for String object
|
144
|
+
#
|
145
|
+
# returner
|
146
|
+
# File object
|
147
|
+
def to_file(target)
|
148
|
+
|
149
|
+
# check the target argument
|
150
|
+
# - if not File object, open it as String for file path
|
151
|
+
return target if target.is_a?(File)
|
152
|
+
File.open(target.to_s)
|
153
|
+
end
|
154
|
+
end
|
155
|
+
end
|
@@ -0,0 +1,147 @@
|
|
1
|
+
module FileBsearch
|
2
|
+
class << self
|
3
|
+
|
4
|
+
# returner
|
5
|
+
# Array object
|
6
|
+
def get_lines(target, prefix, *args)
|
7
|
+
|
8
|
+
prefix = prefix.to_s
|
9
|
+
file = to_file(target)
|
10
|
+
|
11
|
+
# pass to scan method
|
12
|
+
pos = scan(file, nil, *args) do |line|
|
13
|
+
line[0, prefix.size] <=> prefix
|
14
|
+
end
|
15
|
+
|
16
|
+
if pos.is_a?(Integer)
|
17
|
+
|
18
|
+
needle = Regexp.new("^#{Regexp.escape(prefix)}")
|
19
|
+
result = around_lines(file, pos, needle)
|
20
|
+
end
|
21
|
+
|
22
|
+
result || []
|
23
|
+
end
|
24
|
+
|
25
|
+
private
|
26
|
+
|
27
|
+
# get lines around for passed file#pos
|
28
|
+
#
|
29
|
+
# args
|
30
|
+
# file ... File object
|
31
|
+
# pos ... starting point for file#pos
|
32
|
+
# require to be within contiguous range
|
33
|
+
# needle ... RegExp object for contiguous check
|
34
|
+
#
|
35
|
+
# returner
|
36
|
+
# Array object
|
37
|
+
def around_lines(file, pos, needle)
|
38
|
+
|
39
|
+
# scan
|
40
|
+
min = scan_contiguous_min(file, pos, needle) || pos
|
41
|
+
max = scan_contiguous_max(file, pos, needle) || pos
|
42
|
+
|
43
|
+
# read
|
44
|
+
file.seek(min)
|
45
|
+
lines = file.read(max - min).chomp
|
46
|
+
|
47
|
+
# return
|
48
|
+
return lines.split(/[\r\n]+/) if lines.size > 0
|
49
|
+
[]
|
50
|
+
end
|
51
|
+
|
52
|
+
# scan min file#pos of contiguous range.
|
53
|
+
#
|
54
|
+
# args
|
55
|
+
# file ... File object
|
56
|
+
# pos ... starting point for file#pos
|
57
|
+
# require to be within contiguous range
|
58
|
+
# needle ... RegExp object for contiguous check
|
59
|
+
# step ... buffer size for check processing
|
60
|
+
#
|
61
|
+
# returner
|
62
|
+
# Integer object for file#pos
|
63
|
+
# EOS of matched line
|
64
|
+
def scan_contiguous_min(file, pos, needle, step = 512)
|
65
|
+
|
66
|
+
file.pos = pos
|
67
|
+
min = nil
|
68
|
+
|
69
|
+
loop do
|
70
|
+
|
71
|
+
lines = backward_lines(file, step)
|
72
|
+
lines_pos = lines.index(needle)
|
73
|
+
file_pos = file.pos
|
74
|
+
|
75
|
+
# for debug
|
76
|
+
# p [
|
77
|
+
# lines: lines,
|
78
|
+
# lines_pos: lines_pos,
|
79
|
+
# file_pos: file_pos
|
80
|
+
# ].to_s
|
81
|
+
# sleep 0.05
|
82
|
+
|
83
|
+
if lines_pos.nil?
|
84
|
+
break
|
85
|
+
else
|
86
|
+
min = file_pos + lines_pos
|
87
|
+
break if lines_pos > 0 || file_pos < 1
|
88
|
+
end
|
89
|
+
end
|
90
|
+
|
91
|
+
min
|
92
|
+
end
|
93
|
+
|
94
|
+
# scan max file#pos of contiguous range.
|
95
|
+
#
|
96
|
+
# args
|
97
|
+
# file ... File object
|
98
|
+
# pos ... starting point for file#pos
|
99
|
+
# require to be within contiguous range
|
100
|
+
# needle ... RegExp object for contiguous check
|
101
|
+
# step ... buffer size for check processing
|
102
|
+
#
|
103
|
+
# returner
|
104
|
+
# Integer object for file#pos
|
105
|
+
# EOL of matched line
|
106
|
+
def scan_contiguous_max(file, pos, needle, step = 512)
|
107
|
+
|
108
|
+
file.pos = pos
|
109
|
+
max = nil
|
110
|
+
|
111
|
+
loop do
|
112
|
+
|
113
|
+
# file#pos before #forward_lines
|
114
|
+
pos_old = file.pos
|
115
|
+
|
116
|
+
lines = forward_lines(file, step)
|
117
|
+
lines_pos = lines.rindex(needle)
|
118
|
+
|
119
|
+
# for debug
|
120
|
+
# p [
|
121
|
+
# lines: lines,
|
122
|
+
# lines_pos: lines_pos,
|
123
|
+
# file_pos: file.pos
|
124
|
+
# ].to_s
|
125
|
+
# sleep 0.05
|
126
|
+
|
127
|
+
# if did not match needle
|
128
|
+
# - returner is last set value to 'max'
|
129
|
+
break if lines_pos.nil?
|
130
|
+
|
131
|
+
lines_end_pos = lines.index(/([\r\n]+?)/, lines_pos)
|
132
|
+
|
133
|
+
if file.eof?
|
134
|
+
max = (lines_end_pos.nil?) ? file.size : pos_old + lines_end_pos
|
135
|
+
break
|
136
|
+
else
|
137
|
+
max = pos_old + lines_end_pos
|
138
|
+
|
139
|
+
break if lines_end_pos < lines.size - 1
|
140
|
+
end
|
141
|
+
|
142
|
+
end
|
143
|
+
|
144
|
+
max
|
145
|
+
end
|
146
|
+
end
|
147
|
+
end
|
@@ -0,0 +1,29 @@
|
|
1
|
+
module FileBsearch
|
2
|
+
class << self
|
3
|
+
|
4
|
+
# args
|
5
|
+
#
|
6
|
+
# returner
|
7
|
+
# true ... data is exist
|
8
|
+
# false ... data is not exist
|
9
|
+
def exist?(*args)
|
10
|
+
|
11
|
+
# pass to index method
|
12
|
+
# - true if returner is Integer
|
13
|
+
index(*args).is_a?(Integer)
|
14
|
+
end
|
15
|
+
|
16
|
+
# args
|
17
|
+
# target ... File object || path for String object
|
18
|
+
#
|
19
|
+
# returner
|
20
|
+
# position in target file
|
21
|
+
def index(target, *args, &block)
|
22
|
+
|
23
|
+
file = to_file(target)
|
24
|
+
|
25
|
+
# pass to scan method
|
26
|
+
scan(file, *args, &block)
|
27
|
+
end
|
28
|
+
end
|
29
|
+
end
|
data/lib/file_bsearch.rb
ADDED
@@ -0,0 +1,34 @@
|
|
1
|
+
require "file_bsearch/common"
|
2
|
+
require "file_bsearch/get_lines"
|
3
|
+
require "file_bsearch/main"
|
4
|
+
require "file_bsearch/version"
|
5
|
+
|
6
|
+
module FileBsearch
|
7
|
+
end
|
8
|
+
|
9
|
+
class File
|
10
|
+
|
11
|
+
def self.bsearch(*args)
|
12
|
+
FileBsearch.index(*args)
|
13
|
+
end
|
14
|
+
|
15
|
+
def bsearch(*args)
|
16
|
+
FileBsearch.index(self, *args)
|
17
|
+
end
|
18
|
+
|
19
|
+
def self.bsearch?(*args)
|
20
|
+
FileBsearch.exist?(*args)
|
21
|
+
end
|
22
|
+
|
23
|
+
def bsearch?(*args)
|
24
|
+
FileBsearch.exist?(self, *args)
|
25
|
+
end
|
26
|
+
|
27
|
+
def self.bsearch_lines(*args)
|
28
|
+
FileBsearch.get_lines(*args)
|
29
|
+
end
|
30
|
+
|
31
|
+
def bsearch_lines(*args)
|
32
|
+
FileBsearch.get_lines(self, *args)
|
33
|
+
end
|
34
|
+
end
|
@@ -0,0 +1,144 @@
|
|
1
|
+
require 'spec_helper'
|
2
|
+
|
3
|
+
describe FileBsearch do
|
4
|
+
|
5
|
+
let(:path) { '/tmp/file_bsearch_sample.csv' }
|
6
|
+
let(:correct) { '9' }
|
7
|
+
let(:incorrect) { '!!!!!' }
|
8
|
+
|
9
|
+
let!(:helper) { MySpecHelper.new(path) }
|
10
|
+
|
11
|
+
it 'has a version number' do
|
12
|
+
expect(FileBsearch::VERSION).not_to be nil
|
13
|
+
end
|
14
|
+
|
15
|
+
describe 'module' do
|
16
|
+
|
17
|
+
describe '#exist?' do
|
18
|
+
|
19
|
+
it 'when a record is exists' do
|
20
|
+
|
21
|
+
result = FileBsearch.exist?(path, correct)
|
22
|
+
|
23
|
+
expect(result).to eq(true)
|
24
|
+
end
|
25
|
+
|
26
|
+
it 'when a record is not exist' do
|
27
|
+
|
28
|
+
result = FileBsearch.exist?(path, incorrect)
|
29
|
+
|
30
|
+
expect(result).to eq(false)
|
31
|
+
end
|
32
|
+
|
33
|
+
it 'when all records are exist' do
|
34
|
+
|
35
|
+
File.foreach(path) do |line|
|
36
|
+
|
37
|
+
correct = line.chomp
|
38
|
+
result = FileBsearch.exist?(path, correct)
|
39
|
+
|
40
|
+
expect(result).to eq(true)
|
41
|
+
end
|
42
|
+
end
|
43
|
+
end
|
44
|
+
|
45
|
+
describe '#index' do
|
46
|
+
|
47
|
+
it 'when returner is number as position in the file' do
|
48
|
+
|
49
|
+
b_result = FileBsearch.index(path, correct)
|
50
|
+
h_result = helper.correct?(correct, b_result)
|
51
|
+
|
52
|
+
expect(h_result).to eq(true)
|
53
|
+
end
|
54
|
+
|
55
|
+
it 'when returner is false' do
|
56
|
+
|
57
|
+
result = FileBsearch.index(path, incorrect)
|
58
|
+
expect(result).to eq(false)
|
59
|
+
end
|
60
|
+
end
|
61
|
+
|
62
|
+
describe '#get_lines' do
|
63
|
+
it 'when lines is exist that with the prefix' do
|
64
|
+
|
65
|
+
result = FileBsearch.get_lines(path, '1000')
|
66
|
+
|
67
|
+
expect(result).to eq(%w{1000 10000})
|
68
|
+
end
|
69
|
+
|
70
|
+
it 'when lines is not exist that with the prefix' do
|
71
|
+
|
72
|
+
result = FileBsearch.get_lines(path, '!!!!!')
|
73
|
+
|
74
|
+
expect(result).to eq([])
|
75
|
+
end
|
76
|
+
end
|
77
|
+
end
|
78
|
+
|
79
|
+
describe 'instance method' do
|
80
|
+
|
81
|
+
let(:file) { open(path) }
|
82
|
+
|
83
|
+
describe '#bsearch' do
|
84
|
+
|
85
|
+
it 'when returner is number as position in the file' do
|
86
|
+
|
87
|
+
b_result = file.bsearch(correct)
|
88
|
+
h_result = helper.correct?(correct, b_result)
|
89
|
+
|
90
|
+
expect(h_result).to eq(true)
|
91
|
+
end
|
92
|
+
end
|
93
|
+
|
94
|
+
describe '#bsearch?' do
|
95
|
+
it 'when a record is exists' do
|
96
|
+
|
97
|
+
result = file.bsearch?(correct)
|
98
|
+
|
99
|
+
expect(result).to eq(true)
|
100
|
+
end
|
101
|
+
end
|
102
|
+
|
103
|
+
describe '#bsearch_lines' do
|
104
|
+
it 'when lines is exist that with the prefix' do
|
105
|
+
|
106
|
+
result = file.bsearch_lines('1000')
|
107
|
+
|
108
|
+
expect(result).to eq(%w{1000 10000})
|
109
|
+
end
|
110
|
+
end
|
111
|
+
end
|
112
|
+
|
113
|
+
describe 'class method' do
|
114
|
+
|
115
|
+
describe '#bsearch' do
|
116
|
+
|
117
|
+
it 'when returner is number as position in the file' do
|
118
|
+
|
119
|
+
b_result = File.bsearch(path, correct)
|
120
|
+
h_result = helper.correct?(correct, b_result)
|
121
|
+
|
122
|
+
expect(h_result).to eq(true)
|
123
|
+
end
|
124
|
+
end
|
125
|
+
|
126
|
+
describe '#bsearch?' do
|
127
|
+
it 'when a record is exists' do
|
128
|
+
|
129
|
+
result = File.bsearch?(path, correct)
|
130
|
+
|
131
|
+
expect(result).to eq(true)
|
132
|
+
end
|
133
|
+
end
|
134
|
+
|
135
|
+
describe '#bsearch_lines' do
|
136
|
+
it 'when lines is exist that with the prefix' do
|
137
|
+
|
138
|
+
result = File.bsearch_lines(path, '1000')
|
139
|
+
|
140
|
+
expect(result).to eq(%w{1000 10000})
|
141
|
+
end
|
142
|
+
end
|
143
|
+
end
|
144
|
+
end
|
@@ -0,0 +1,32 @@
|
|
1
|
+
class MySpecHelper
|
2
|
+
|
3
|
+
attr_reader :path
|
4
|
+
|
5
|
+
def initialize(path)
|
6
|
+
|
7
|
+
@path = path
|
8
|
+
|
9
|
+
init_sample_file
|
10
|
+
end
|
11
|
+
|
12
|
+
def init_sample_file
|
13
|
+
|
14
|
+
# guard
|
15
|
+
# - if file is already exists, do not
|
16
|
+
return nil if File.exist?(@path)
|
17
|
+
|
18
|
+
# create sorted string
|
19
|
+
str = (0..10_000).to_a.sort { |a, b| a.to_s <=> b.to_s }.join("\n")
|
20
|
+
|
21
|
+
IO.write(@path, str)
|
22
|
+
end
|
23
|
+
|
24
|
+
# check returner
|
25
|
+
#
|
26
|
+
# args
|
27
|
+
# str ... string for correct
|
28
|
+
# pos ... position in the file (FileBsearch#returner)
|
29
|
+
def correct?(str, pos)
|
30
|
+
str == IO.read(@path, str.size, pos)
|
31
|
+
end
|
32
|
+
end
|
data/spec/spec_helper.rb
ADDED
metadata
ADDED
@@ -0,0 +1,122 @@
|
|
1
|
+
--- !ruby/object:Gem::Specification
|
2
|
+
name: file_bsearch
|
3
|
+
version: !ruby/object:Gem::Version
|
4
|
+
version: 1.0.2
|
5
|
+
platform: ruby
|
6
|
+
authors:
|
7
|
+
- indeep-xyz
|
8
|
+
autorequire:
|
9
|
+
bindir: bin
|
10
|
+
cert_chain: []
|
11
|
+
date: 2014-10-02 00:00:00.000000000 Z
|
12
|
+
dependencies:
|
13
|
+
- !ruby/object:Gem::Dependency
|
14
|
+
name: bundler
|
15
|
+
requirement: !ruby/object:Gem::Requirement
|
16
|
+
requirements:
|
17
|
+
- - "~>"
|
18
|
+
- !ruby/object:Gem::Version
|
19
|
+
version: '1.7'
|
20
|
+
type: :development
|
21
|
+
prerelease: false
|
22
|
+
version_requirements: !ruby/object:Gem::Requirement
|
23
|
+
requirements:
|
24
|
+
- - "~>"
|
25
|
+
- !ruby/object:Gem::Version
|
26
|
+
version: '1.7'
|
27
|
+
- !ruby/object:Gem::Dependency
|
28
|
+
name: rake
|
29
|
+
requirement: !ruby/object:Gem::Requirement
|
30
|
+
requirements:
|
31
|
+
- - "~>"
|
32
|
+
- !ruby/object:Gem::Version
|
33
|
+
version: '10.0'
|
34
|
+
type: :development
|
35
|
+
prerelease: false
|
36
|
+
version_requirements: !ruby/object:Gem::Requirement
|
37
|
+
requirements:
|
38
|
+
- - "~>"
|
39
|
+
- !ruby/object:Gem::Version
|
40
|
+
version: '10.0'
|
41
|
+
- !ruby/object:Gem::Dependency
|
42
|
+
name: rspec
|
43
|
+
requirement: !ruby/object:Gem::Requirement
|
44
|
+
requirements:
|
45
|
+
- - ">="
|
46
|
+
- !ruby/object:Gem::Version
|
47
|
+
version: '0'
|
48
|
+
type: :development
|
49
|
+
prerelease: false
|
50
|
+
version_requirements: !ruby/object:Gem::Requirement
|
51
|
+
requirements:
|
52
|
+
- - ">="
|
53
|
+
- !ruby/object:Gem::Version
|
54
|
+
version: '0'
|
55
|
+
- !ruby/object:Gem::Dependency
|
56
|
+
name: rubocop
|
57
|
+
requirement: !ruby/object:Gem::Requirement
|
58
|
+
requirements:
|
59
|
+
- - ">="
|
60
|
+
- !ruby/object:Gem::Version
|
61
|
+
version: '0'
|
62
|
+
type: :development
|
63
|
+
prerelease: false
|
64
|
+
version_requirements: !ruby/object:Gem::Requirement
|
65
|
+
requirements:
|
66
|
+
- - ">="
|
67
|
+
- !ruby/object:Gem::Version
|
68
|
+
version: '0'
|
69
|
+
description: binary search for sorted text file. it is effective when file size is
|
70
|
+
bigger.
|
71
|
+
email:
|
72
|
+
- indeep.xyz@gmail.com
|
73
|
+
executables: []
|
74
|
+
extensions: []
|
75
|
+
extra_rdoc_files: []
|
76
|
+
files:
|
77
|
+
- ".gitignore"
|
78
|
+
- ".rspec"
|
79
|
+
- ".rubocop.yml"
|
80
|
+
- ".travis.yml"
|
81
|
+
- Gemfile
|
82
|
+
- LICENSE.txt
|
83
|
+
- README.ja.md
|
84
|
+
- README.md
|
85
|
+
- Rakefile
|
86
|
+
- file_bsearch.gemspec
|
87
|
+
- lib/file_bsearch.rb
|
88
|
+
- lib/file_bsearch/common.rb
|
89
|
+
- lib/file_bsearch/get_lines.rb
|
90
|
+
- lib/file_bsearch/main.rb
|
91
|
+
- lib/file_bsearch/version.rb
|
92
|
+
- spec/file_bsearch_spec.rb
|
93
|
+
- spec/lib/my_spec_helper.rb
|
94
|
+
- spec/spec_helper.rb
|
95
|
+
homepage: https://github.com/indeep-xyz/ruby-file-bsearch/
|
96
|
+
licenses:
|
97
|
+
- MIT
|
98
|
+
metadata: {}
|
99
|
+
post_install_message:
|
100
|
+
rdoc_options: []
|
101
|
+
require_paths:
|
102
|
+
- lib
|
103
|
+
required_ruby_version: !ruby/object:Gem::Requirement
|
104
|
+
requirements:
|
105
|
+
- - ">="
|
106
|
+
- !ruby/object:Gem::Version
|
107
|
+
version: '0'
|
108
|
+
required_rubygems_version: !ruby/object:Gem::Requirement
|
109
|
+
requirements:
|
110
|
+
- - ">="
|
111
|
+
- !ruby/object:Gem::Version
|
112
|
+
version: '0'
|
113
|
+
requirements: []
|
114
|
+
rubyforge_project:
|
115
|
+
rubygems_version: 2.2.2
|
116
|
+
signing_key:
|
117
|
+
specification_version: 4
|
118
|
+
summary: binary search for sorted text file.
|
119
|
+
test_files:
|
120
|
+
- spec/file_bsearch_spec.rb
|
121
|
+
- spec/lib/my_spec_helper.rb
|
122
|
+
- spec/spec_helper.rb
|