file_bsearch 1.0.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/.gitignore +15 -0
- data/.rspec +2 -0
- data/.rubocop.yml +42 -0
- data/.travis.yml +3 -0
- data/Gemfile +4 -0
- data/LICENSE.txt +22 -0
- data/README.ja.md +65 -0
- data/README.md +68 -0
- data/Rakefile +7 -0
- data/file_bsearch.gemspec +25 -0
- data/lib/file_bsearch/common.rb +155 -0
- data/lib/file_bsearch/get_lines.rb +147 -0
- data/lib/file_bsearch/main.rb +29 -0
- data/lib/file_bsearch/version.rb +3 -0
- data/lib/file_bsearch.rb +34 -0
- data/spec/file_bsearch_spec.rb +144 -0
- data/spec/lib/my_spec_helper.rb +32 -0
- data/spec/spec_helper.rb +4 -0
- metadata +122 -0
checksums.yaml
ADDED
@@ -0,0 +1,7 @@
|
|
1
|
+
---
|
2
|
+
SHA1:
|
3
|
+
metadata.gz: 47cd4336ef5b84ecf2abcc8ec596ad43245ae679
|
4
|
+
data.tar.gz: 0fab662f8d7ebda542d9d3e7eca1b53b9f7a69d6
|
5
|
+
SHA512:
|
6
|
+
metadata.gz: e567fbb1af6fd494739b30d4e101c901ef16c53ba08a2b6bd035c2028b14e1bb593ea36460283fcb8bb7012fa5099b5994d219cf221022694b9ce37f841afc7a
|
7
|
+
data.tar.gz: 880234a6190ee6448db8d9fed3d372bd4e7d1812063d8ed11733062d2bbd0aa0712c03bef56d720facab666740c5d0a9e3f83c402399ecd9035a8d0075c3dfce
|
data/.gitignore
ADDED
data/.rspec
ADDED
data/.rubocop.yml
ADDED
@@ -0,0 +1,42 @@
|
|
1
|
+
# This configuration was generated by `rubocop --auto-gen-config`
|
2
|
+
# on 2014-09-23 13:01:37 +0900 using RuboCop version 0.26.1.
|
3
|
+
# The point is for the user to remove these configuration records
|
4
|
+
# one by one as the offenses are removed from the code base.
|
5
|
+
# Note that changes in the inspected code, or installation of new
|
6
|
+
# versions of RuboCop, may require this file to be generated again.
|
7
|
+
|
8
|
+
# Configuration parameters: CountComments.
|
9
|
+
Metrics/MethodLength:
|
10
|
+
Max: 15
|
11
|
+
|
12
|
+
Style/Documentation:
|
13
|
+
Enabled: false
|
14
|
+
|
15
|
+
Style/EmptyLinesAroundBody:
|
16
|
+
Enabled: false
|
17
|
+
|
18
|
+
# Configuration parameters: SupportedStyles.
|
19
|
+
Style/HashSyntax:
|
20
|
+
EnforcedStyle: hash_rockets
|
21
|
+
|
22
|
+
# Configuration parameters: EnforcedStyle, MinBodyLength, SupportedStyles.
|
23
|
+
Style/Next:
|
24
|
+
Enabled: false
|
25
|
+
|
26
|
+
# Configuration parameters: PreferredDelimiters.
|
27
|
+
Style/PercentLiteralDelimiters:
|
28
|
+
Enabled: false
|
29
|
+
|
30
|
+
Style/RegexpLiteral:
|
31
|
+
MaxSlashes: 0
|
32
|
+
|
33
|
+
# Configuration parameters: EnforcedStyle, SupportedStyles.
|
34
|
+
Style/StringLiterals:
|
35
|
+
Enabled: false
|
36
|
+
|
37
|
+
# Configuration parameters: EnforcedStyle, SupportedStyles.
|
38
|
+
Style/TrailingBlankLines:
|
39
|
+
Enabled: false
|
40
|
+
|
41
|
+
Style/UnneededPercentQ:
|
42
|
+
Enabled: false
|
data/.travis.yml
ADDED
data/Gemfile
ADDED
data/LICENSE.txt
ADDED
@@ -0,0 +1,22 @@
|
|
1
|
+
Copyright (c) 2014 TODO: Write your name
|
2
|
+
|
3
|
+
MIT License
|
4
|
+
|
5
|
+
Permission is hereby granted, free of charge, to any person obtaining
|
6
|
+
a copy of this software and associated documentation files (the
|
7
|
+
"Software"), to deal in the Software without restriction, including
|
8
|
+
without limitation the rights to use, copy, modify, merge, publish,
|
9
|
+
distribute, sublicense, and/or sell copies of the Software, and to
|
10
|
+
permit persons to whom the Software is furnished to do so, subject to
|
11
|
+
the following conditions:
|
12
|
+
|
13
|
+
The above copyright notice and this permission notice shall be
|
14
|
+
included in all copies or substantial portions of the Software.
|
15
|
+
|
16
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
17
|
+
EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
|
18
|
+
MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
19
|
+
NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
|
20
|
+
LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
|
21
|
+
OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
|
22
|
+
WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
data/README.ja.md
ADDED
@@ -0,0 +1,65 @@
|
|
1
|
+
FileBsearch
|
2
|
+
====
|
3
|
+
|
4
|
+
Ruby 用のライブラリです。
|
5
|
+
|
6
|
+
ソート済みのテキストファイルに対してバイナリサーチ (二分探索) をおこないます。巨大なファイルに対して効果的です。
|
7
|
+
|
8
|
+
## Installation
|
9
|
+
|
10
|
+
```ruby
|
11
|
+
gem install file_bsearch
|
12
|
+
```
|
13
|
+
|
14
|
+
## Usage
|
15
|
+
|
16
|
+
### 指定文字列の行が、ファイル内に存在するか
|
17
|
+
|
18
|
+
```ruby
|
19
|
+
FileBsearch.exist?(path, string)
|
20
|
+
|
21
|
+
File.bsearch?(path, string)
|
22
|
+
|
23
|
+
file = open(path)
|
24
|
+
file.bsearch?(string)
|
25
|
+
```
|
26
|
+
|
27
|
+
存在する場合は true 、存在しない場合は false が返ります。
|
28
|
+
|
29
|
+
### 指定文字列の行が、ファイル内のどの位置にあるか
|
30
|
+
|
31
|
+
```ruby
|
32
|
+
FileBsearch.index(path, string)
|
33
|
+
|
34
|
+
File.bsearch(path, string)
|
35
|
+
|
36
|
+
file = open(path)
|
37
|
+
file.bsearch(string)
|
38
|
+
```
|
39
|
+
|
40
|
+
存在する場合はその行の開始位置、存在しない場合は false が返ります。
|
41
|
+
|
42
|
+
### 指定文字列から始まる行の取得
|
43
|
+
|
44
|
+
```ruby
|
45
|
+
FileBsearch.get_lines(path, prefix)
|
46
|
+
|
47
|
+
File.bsearch_lines(path, prefix)
|
48
|
+
|
49
|
+
file = open(path)
|
50
|
+
file.bsearch_lines(prefix)
|
51
|
+
```
|
52
|
+
|
53
|
+
存在する場合はそれらの行の文字列を含んだ配列、存在しない場合は空の配列が返ります。
|
54
|
+
|
55
|
+
## Contributing
|
56
|
+
|
57
|
+
1. Fork it ( https://github.com/indeep-xyz/ruby-file-bsearch/fork )
|
58
|
+
2. Create your feature branch (`git checkout -b my-new-feature`)
|
59
|
+
3. Commit your changes (`git commit -am 'Add some feature'`)
|
60
|
+
4. Push to the branch (`git push origin my-new-feature`)
|
61
|
+
5. Create a new Pull Request
|
62
|
+
|
63
|
+
## Author
|
64
|
+
|
65
|
+
[indeep-xyz](http://indeep.xyz/)
|
data/README.md
ADDED
@@ -0,0 +1,68 @@
|
|
1
|
+
FileBsearch
|
2
|
+
====
|
3
|
+
|
4
|
+
library for Ruby.
|
5
|
+
|
6
|
+
binary search for sorted text file. it is effective when file size is bigger.
|
7
|
+
|
8
|
+
## Installation
|
9
|
+
|
10
|
+
```ruby
|
11
|
+
gem install file_bsearch
|
12
|
+
```
|
13
|
+
|
14
|
+
## Usage
|
15
|
+
|
16
|
+
### check to exist line that identical with passed string
|
17
|
+
|
18
|
+
```ruby
|
19
|
+
FileBsearch.exist?(path, string)
|
20
|
+
|
21
|
+
File.bsearch?(path, string)
|
22
|
+
|
23
|
+
file = open(path)
|
24
|
+
file.bsearch?(string)
|
25
|
+
```
|
26
|
+
|
27
|
+
if exists, return true.
|
28
|
+
if not exists, return false.
|
29
|
+
|
30
|
+
### search position in file for the line that is identical with passed string
|
31
|
+
|
32
|
+
```ruby
|
33
|
+
FileBsearch.index(path, string)
|
34
|
+
|
35
|
+
File.bsearch(path, string)
|
36
|
+
|
37
|
+
file = open(path)
|
38
|
+
file.bsearch(string)
|
39
|
+
```
|
40
|
+
|
41
|
+
if exists, return Integer object that is position in file for matched line.
|
42
|
+
if not exists, return false.
|
43
|
+
|
44
|
+
### get lines that matched string with passed prefix
|
45
|
+
|
46
|
+
```ruby
|
47
|
+
FileBsearch.get_lines(path, prefix)
|
48
|
+
|
49
|
+
File.bsearch_lines(path, prefix)
|
50
|
+
|
51
|
+
file = open(path)
|
52
|
+
file.bsearch_lines(prefix)
|
53
|
+
```
|
54
|
+
|
55
|
+
if exists, return Array object that include matched lines.
|
56
|
+
if not exists, return empty Array object.
|
57
|
+
|
58
|
+
## Contributing
|
59
|
+
|
60
|
+
1. Fork it ( https://github.com/indeep-xyz/ruby-file-bsearch/fork )
|
61
|
+
2. Create your feature branch (`git checkout -b my-new-feature`)
|
62
|
+
3. Commit your changes (`git commit -am 'Add some feature'`)
|
63
|
+
4. Push to the branch (`git push origin my-new-feature`)
|
64
|
+
5. Create a new Pull Request
|
65
|
+
|
66
|
+
## Author
|
67
|
+
|
68
|
+
[indeep-xyz](http://indeep.xyz/)
|
data/Rakefile
ADDED
@@ -0,0 +1,25 @@
|
|
1
|
+
# coding: utf-8
|
2
|
+
lib = File.expand_path('../lib', __FILE__)
|
3
|
+
$LOAD_PATH.unshift(lib) unless $LOAD_PATH.include?(lib)
|
4
|
+
require 'file_bsearch/version'
|
5
|
+
|
6
|
+
Gem::Specification.new do |spec|
|
7
|
+
spec.name = "file_bsearch"
|
8
|
+
spec.version = FileBsearch::VERSION
|
9
|
+
spec.authors = ["indeep-xyz"]
|
10
|
+
spec.email = ["indeep.xyz@gmail.com"]
|
11
|
+
spec.summary = %q{binary search for sorted text file.}
|
12
|
+
spec.description = %q{binary search for sorted text file. it is effective when file size is bigger.}
|
13
|
+
spec.homepage = "https://github.com/indeep-xyz/ruby-file-bsearch/"
|
14
|
+
spec.license = "MIT"
|
15
|
+
|
16
|
+
spec.files = `git ls-files -z`.split("\x0")
|
17
|
+
spec.executables = spec.files.grep(%r{^bin/}) { |f| File.basename(f) }
|
18
|
+
spec.test_files = spec.files.grep(%r{^(test|spec|features)/})
|
19
|
+
spec.require_paths = ["lib"]
|
20
|
+
|
21
|
+
spec.add_development_dependency "bundler", "~> 1.7"
|
22
|
+
spec.add_development_dependency "rake", "~> 10.0"
|
23
|
+
spec.add_development_dependency "rspec"
|
24
|
+
spec.add_development_dependency "rubocop"
|
25
|
+
end
|
@@ -0,0 +1,155 @@
|
|
1
|
+
module FileBsearch
|
2
|
+
class << self
|
3
|
+
|
4
|
+
private
|
5
|
+
|
6
|
+
# get backward lines
|
7
|
+
#
|
8
|
+
# args
|
9
|
+
# file ... File object
|
10
|
+
# #pos value should be at SOL (Start Of Line)
|
11
|
+
# size ... indication of reading size
|
12
|
+
#
|
13
|
+
# returner
|
14
|
+
# String object as lines
|
15
|
+
def backward_lines(file, size = 512)
|
16
|
+
|
17
|
+
result = ""
|
18
|
+
|
19
|
+
while file.pos > 0
|
20
|
+
|
21
|
+
file.seek(-1, IO::SEEK_CUR)
|
22
|
+
char = file.getc
|
23
|
+
|
24
|
+
if char.match(/[\r\n]/) && result.size > size
|
25
|
+
break
|
26
|
+
else
|
27
|
+
result.insert(0, char)
|
28
|
+
file.seek(-1, IO::SEEK_CUR)
|
29
|
+
end
|
30
|
+
end
|
31
|
+
|
32
|
+
result
|
33
|
+
end
|
34
|
+
|
35
|
+
# get a line string at current position
|
36
|
+
#
|
37
|
+
# args
|
38
|
+
# file ... File object
|
39
|
+
def current_line(file)
|
40
|
+
|
41
|
+
if file.pos > 0
|
42
|
+
|
43
|
+
# move pointer to before character
|
44
|
+
file.seek(-1, IO::SEEK_CUR)
|
45
|
+
|
46
|
+
# loop
|
47
|
+
# - move pointer until reach to EOL of before line.
|
48
|
+
until file.getc.match(/[\n\r]/)
|
49
|
+
|
50
|
+
# move pointer to before character
|
51
|
+
if file.pos > 1
|
52
|
+
file.seek(-2, IO::SEEK_CUR)
|
53
|
+
else
|
54
|
+
|
55
|
+
# if EOS, break
|
56
|
+
file.rewind
|
57
|
+
break
|
58
|
+
end
|
59
|
+
end
|
60
|
+
end
|
61
|
+
|
62
|
+
file.gets
|
63
|
+
end
|
64
|
+
|
65
|
+
# get forward lines
|
66
|
+
#
|
67
|
+
# args
|
68
|
+
# file ... File object
|
69
|
+
# #pos value should be at SOL (Start Of Line)
|
70
|
+
# size ... indication of reading size
|
71
|
+
#
|
72
|
+
# returner
|
73
|
+
# String object as lines
|
74
|
+
def forward_lines(file, size = 512)
|
75
|
+
|
76
|
+
result = ""
|
77
|
+
|
78
|
+
while result.size < size && !file.eof?
|
79
|
+
|
80
|
+
result += file.gets
|
81
|
+
end
|
82
|
+
|
83
|
+
result
|
84
|
+
end
|
85
|
+
|
86
|
+
# args
|
87
|
+
# file ... File object
|
88
|
+
# needle ... search text
|
89
|
+
# min ... min of file pointer for search
|
90
|
+
# max ... max of file pointer for search
|
91
|
+
# block ... comparison process
|
92
|
+
#
|
93
|
+
# returner
|
94
|
+
# position in file
|
95
|
+
def scan(file, needle, min = 0, max = nil, &block)
|
96
|
+
|
97
|
+
max ||= file.size
|
98
|
+
block ||= proc { |a, b| a <=> b }
|
99
|
+
needle = needle.to_s
|
100
|
+
old_pos = -1
|
101
|
+
|
102
|
+
# reset pointer
|
103
|
+
file.rewind
|
104
|
+
|
105
|
+
# loop for search
|
106
|
+
# - if over max point, not found
|
107
|
+
# - if EOF, not found
|
108
|
+
# - if pointer did not moved, not found
|
109
|
+
while max > file.pos \
|
110
|
+
&& (line = current_line(file)) \
|
111
|
+
&& file.pos != old_pos
|
112
|
+
|
113
|
+
old_pos = file.pos
|
114
|
+
code = block.call(line.chomp, needle)
|
115
|
+
|
116
|
+
# for debug
|
117
|
+
# p "-- #{needle}, " + {
|
118
|
+
# code: code,
|
119
|
+
# text: line.chomp,
|
120
|
+
# min: min,
|
121
|
+
# max: max,
|
122
|
+
# pos: file.pos
|
123
|
+
# }.to_s
|
124
|
+
# sleep 0.05
|
125
|
+
|
126
|
+
# comparison
|
127
|
+
# - if not match, update either range values
|
128
|
+
# - if match, return
|
129
|
+
case code
|
130
|
+
when -1 then min = file.pos - 1
|
131
|
+
when 1 then max = file.pos - line.length
|
132
|
+
else return file.pos - line.length
|
133
|
+
end
|
134
|
+
|
135
|
+
# move to mid point
|
136
|
+
file.seek((min + max) / 2)
|
137
|
+
end
|
138
|
+
|
139
|
+
false
|
140
|
+
end
|
141
|
+
|
142
|
+
# args
|
143
|
+
# target ... File object || path for String object
|
144
|
+
#
|
145
|
+
# returner
|
146
|
+
# File object
|
147
|
+
def to_file(target)
|
148
|
+
|
149
|
+
# check the target argument
|
150
|
+
# - if not File object, open it as String for file path
|
151
|
+
return target if target.is_a?(File)
|
152
|
+
File.open(target.to_s)
|
153
|
+
end
|
154
|
+
end
|
155
|
+
end
|
@@ -0,0 +1,147 @@
|
|
1
|
+
module FileBsearch
|
2
|
+
class << self
|
3
|
+
|
4
|
+
# returner
|
5
|
+
# Array object
|
6
|
+
def get_lines(target, prefix, *args)
|
7
|
+
|
8
|
+
prefix = prefix.to_s
|
9
|
+
file = to_file(target)
|
10
|
+
|
11
|
+
# pass to scan method
|
12
|
+
pos = scan(file, nil, *args) do |line|
|
13
|
+
line[0, prefix.size] <=> prefix
|
14
|
+
end
|
15
|
+
|
16
|
+
if pos.is_a?(Integer)
|
17
|
+
|
18
|
+
needle = Regexp.new("^#{Regexp.escape(prefix)}")
|
19
|
+
result = around_lines(file, pos, needle)
|
20
|
+
end
|
21
|
+
|
22
|
+
result || []
|
23
|
+
end
|
24
|
+
|
25
|
+
private
|
26
|
+
|
27
|
+
# get lines around for passed file#pos
|
28
|
+
#
|
29
|
+
# args
|
30
|
+
# file ... File object
|
31
|
+
# pos ... starting point for file#pos
|
32
|
+
# require to be within contiguous range
|
33
|
+
# needle ... RegExp object for contiguous check
|
34
|
+
#
|
35
|
+
# returner
|
36
|
+
# Array object
|
37
|
+
def around_lines(file, pos, needle)
|
38
|
+
|
39
|
+
# scan
|
40
|
+
min = scan_contiguous_min(file, pos, needle) || pos
|
41
|
+
max = scan_contiguous_max(file, pos, needle) || pos
|
42
|
+
|
43
|
+
# read
|
44
|
+
file.seek(min)
|
45
|
+
lines = file.read(max - min).chomp
|
46
|
+
|
47
|
+
# return
|
48
|
+
return lines.split(/[\r\n]+/) if lines.size > 0
|
49
|
+
[]
|
50
|
+
end
|
51
|
+
|
52
|
+
# scan min file#pos of contiguous range.
|
53
|
+
#
|
54
|
+
# args
|
55
|
+
# file ... File object
|
56
|
+
# pos ... starting point for file#pos
|
57
|
+
# require to be within contiguous range
|
58
|
+
# needle ... RegExp object for contiguous check
|
59
|
+
# step ... buffer size for check processing
|
60
|
+
#
|
61
|
+
# returner
|
62
|
+
# Integer object for file#pos
|
63
|
+
# EOS of matched line
|
64
|
+
def scan_contiguous_min(file, pos, needle, step = 512)
|
65
|
+
|
66
|
+
file.pos = pos
|
67
|
+
min = nil
|
68
|
+
|
69
|
+
loop do
|
70
|
+
|
71
|
+
lines = backward_lines(file, step)
|
72
|
+
lines_pos = lines.index(needle)
|
73
|
+
file_pos = file.pos
|
74
|
+
|
75
|
+
# for debug
|
76
|
+
# p [
|
77
|
+
# lines: lines,
|
78
|
+
# lines_pos: lines_pos,
|
79
|
+
# file_pos: file_pos
|
80
|
+
# ].to_s
|
81
|
+
# sleep 0.05
|
82
|
+
|
83
|
+
if lines_pos.nil?
|
84
|
+
break
|
85
|
+
else
|
86
|
+
min = file_pos + lines_pos
|
87
|
+
break if lines_pos > 0 || file_pos < 1
|
88
|
+
end
|
89
|
+
end
|
90
|
+
|
91
|
+
min
|
92
|
+
end
|
93
|
+
|
94
|
+
# scan max file#pos of contiguous range.
|
95
|
+
#
|
96
|
+
# args
|
97
|
+
# file ... File object
|
98
|
+
# pos ... starting point for file#pos
|
99
|
+
# require to be within contiguous range
|
100
|
+
# needle ... RegExp object for contiguous check
|
101
|
+
# step ... buffer size for check processing
|
102
|
+
#
|
103
|
+
# returner
|
104
|
+
# Integer object for file#pos
|
105
|
+
# EOL of matched line
|
106
|
+
def scan_contiguous_max(file, pos, needle, step = 512)
|
107
|
+
|
108
|
+
file.pos = pos
|
109
|
+
max = nil
|
110
|
+
|
111
|
+
loop do
|
112
|
+
|
113
|
+
# file#pos before #forward_lines
|
114
|
+
pos_old = file.pos
|
115
|
+
|
116
|
+
lines = forward_lines(file, step)
|
117
|
+
lines_pos = lines.rindex(needle)
|
118
|
+
|
119
|
+
# for debug
|
120
|
+
# p [
|
121
|
+
# lines: lines,
|
122
|
+
# lines_pos: lines_pos,
|
123
|
+
# file_pos: file.pos
|
124
|
+
# ].to_s
|
125
|
+
# sleep 0.05
|
126
|
+
|
127
|
+
# if did not match needle
|
128
|
+
# - returner is last set value to 'max'
|
129
|
+
break if lines_pos.nil?
|
130
|
+
|
131
|
+
lines_end_pos = lines.index(/([\r\n]+?)/, lines_pos)
|
132
|
+
|
133
|
+
if file.eof?
|
134
|
+
max = (lines_end_pos.nil?) ? file.size : pos_old + lines_end_pos
|
135
|
+
break
|
136
|
+
else
|
137
|
+
max = pos_old + lines_end_pos
|
138
|
+
|
139
|
+
break if lines_end_pos < lines.size - 1
|
140
|
+
end
|
141
|
+
|
142
|
+
end
|
143
|
+
|
144
|
+
max
|
145
|
+
end
|
146
|
+
end
|
147
|
+
end
|
@@ -0,0 +1,29 @@
|
|
1
|
+
module FileBsearch
|
2
|
+
class << self
|
3
|
+
|
4
|
+
# args
|
5
|
+
#
|
6
|
+
# returner
|
7
|
+
# true ... data is exist
|
8
|
+
# false ... data is not exist
|
9
|
+
def exist?(*args)
|
10
|
+
|
11
|
+
# pass to index method
|
12
|
+
# - true if returner is Integer
|
13
|
+
index(*args).is_a?(Integer)
|
14
|
+
end
|
15
|
+
|
16
|
+
# args
|
17
|
+
# target ... File object || path for String object
|
18
|
+
#
|
19
|
+
# returner
|
20
|
+
# position in target file
|
21
|
+
def index(target, *args, &block)
|
22
|
+
|
23
|
+
file = to_file(target)
|
24
|
+
|
25
|
+
# pass to scan method
|
26
|
+
scan(file, *args, &block)
|
27
|
+
end
|
28
|
+
end
|
29
|
+
end
|
data/lib/file_bsearch.rb
ADDED
@@ -0,0 +1,34 @@
|
|
1
|
+
require "file_bsearch/common"
|
2
|
+
require "file_bsearch/get_lines"
|
3
|
+
require "file_bsearch/main"
|
4
|
+
require "file_bsearch/version"
|
5
|
+
|
6
|
+
module FileBsearch
|
7
|
+
end
|
8
|
+
|
9
|
+
class File
|
10
|
+
|
11
|
+
def self.bsearch(*args)
|
12
|
+
FileBsearch.index(*args)
|
13
|
+
end
|
14
|
+
|
15
|
+
def bsearch(*args)
|
16
|
+
FileBsearch.index(self, *args)
|
17
|
+
end
|
18
|
+
|
19
|
+
def self.bsearch?(*args)
|
20
|
+
FileBsearch.exist?(*args)
|
21
|
+
end
|
22
|
+
|
23
|
+
def bsearch?(*args)
|
24
|
+
FileBsearch.exist?(self, *args)
|
25
|
+
end
|
26
|
+
|
27
|
+
def self.bsearch_lines(*args)
|
28
|
+
FileBsearch.get_lines(*args)
|
29
|
+
end
|
30
|
+
|
31
|
+
def bsearch_lines(*args)
|
32
|
+
FileBsearch.get_lines(self, *args)
|
33
|
+
end
|
34
|
+
end
|
@@ -0,0 +1,144 @@
|
|
1
|
+
require 'spec_helper'
|
2
|
+
|
3
|
+
describe FileBsearch do
|
4
|
+
|
5
|
+
let(:path) { '/tmp/file_bsearch_sample.csv' }
|
6
|
+
let(:correct) { '9' }
|
7
|
+
let(:incorrect) { '!!!!!' }
|
8
|
+
|
9
|
+
let!(:helper) { MySpecHelper.new(path) }
|
10
|
+
|
11
|
+
it 'has a version number' do
|
12
|
+
expect(FileBsearch::VERSION).not_to be nil
|
13
|
+
end
|
14
|
+
|
15
|
+
describe 'module' do
|
16
|
+
|
17
|
+
describe '#exist?' do
|
18
|
+
|
19
|
+
it 'when a record is exists' do
|
20
|
+
|
21
|
+
result = FileBsearch.exist?(path, correct)
|
22
|
+
|
23
|
+
expect(result).to eq(true)
|
24
|
+
end
|
25
|
+
|
26
|
+
it 'when a record is not exist' do
|
27
|
+
|
28
|
+
result = FileBsearch.exist?(path, incorrect)
|
29
|
+
|
30
|
+
expect(result).to eq(false)
|
31
|
+
end
|
32
|
+
|
33
|
+
it 'when all records are exist' do
|
34
|
+
|
35
|
+
File.foreach(path) do |line|
|
36
|
+
|
37
|
+
correct = line.chomp
|
38
|
+
result = FileBsearch.exist?(path, correct)
|
39
|
+
|
40
|
+
expect(result).to eq(true)
|
41
|
+
end
|
42
|
+
end
|
43
|
+
end
|
44
|
+
|
45
|
+
describe '#index' do
|
46
|
+
|
47
|
+
it 'when returner is number as position in the file' do
|
48
|
+
|
49
|
+
b_result = FileBsearch.index(path, correct)
|
50
|
+
h_result = helper.correct?(correct, b_result)
|
51
|
+
|
52
|
+
expect(h_result).to eq(true)
|
53
|
+
end
|
54
|
+
|
55
|
+
it 'when returner is false' do
|
56
|
+
|
57
|
+
result = FileBsearch.index(path, incorrect)
|
58
|
+
expect(result).to eq(false)
|
59
|
+
end
|
60
|
+
end
|
61
|
+
|
62
|
+
describe '#get_lines' do
|
63
|
+
it 'when lines is exist that with the prefix' do
|
64
|
+
|
65
|
+
result = FileBsearch.get_lines(path, '1000')
|
66
|
+
|
67
|
+
expect(result).to eq(%w{1000 10000})
|
68
|
+
end
|
69
|
+
|
70
|
+
it 'when lines is not exist that with the prefix' do
|
71
|
+
|
72
|
+
result = FileBsearch.get_lines(path, '!!!!!')
|
73
|
+
|
74
|
+
expect(result).to eq([])
|
75
|
+
end
|
76
|
+
end
|
77
|
+
end
|
78
|
+
|
79
|
+
describe 'instance method' do
|
80
|
+
|
81
|
+
let(:file) { open(path) }
|
82
|
+
|
83
|
+
describe '#bsearch' do
|
84
|
+
|
85
|
+
it 'when returner is number as position in the file' do
|
86
|
+
|
87
|
+
b_result = file.bsearch(correct)
|
88
|
+
h_result = helper.correct?(correct, b_result)
|
89
|
+
|
90
|
+
expect(h_result).to eq(true)
|
91
|
+
end
|
92
|
+
end
|
93
|
+
|
94
|
+
describe '#bsearch?' do
|
95
|
+
it 'when a record is exists' do
|
96
|
+
|
97
|
+
result = file.bsearch?(correct)
|
98
|
+
|
99
|
+
expect(result).to eq(true)
|
100
|
+
end
|
101
|
+
end
|
102
|
+
|
103
|
+
describe '#bsearch_lines' do
|
104
|
+
it 'when lines is exist that with the prefix' do
|
105
|
+
|
106
|
+
result = file.bsearch_lines('1000')
|
107
|
+
|
108
|
+
expect(result).to eq(%w{1000 10000})
|
109
|
+
end
|
110
|
+
end
|
111
|
+
end
|
112
|
+
|
113
|
+
describe 'class method' do
|
114
|
+
|
115
|
+
describe '#bsearch' do
|
116
|
+
|
117
|
+
it 'when returner is number as position in the file' do
|
118
|
+
|
119
|
+
b_result = File.bsearch(path, correct)
|
120
|
+
h_result = helper.correct?(correct, b_result)
|
121
|
+
|
122
|
+
expect(h_result).to eq(true)
|
123
|
+
end
|
124
|
+
end
|
125
|
+
|
126
|
+
describe '#bsearch?' do
|
127
|
+
it 'when a record is exists' do
|
128
|
+
|
129
|
+
result = File.bsearch?(path, correct)
|
130
|
+
|
131
|
+
expect(result).to eq(true)
|
132
|
+
end
|
133
|
+
end
|
134
|
+
|
135
|
+
describe '#bsearch_lines' do
|
136
|
+
it 'when lines is exist that with the prefix' do
|
137
|
+
|
138
|
+
result = File.bsearch_lines(path, '1000')
|
139
|
+
|
140
|
+
expect(result).to eq(%w{1000 10000})
|
141
|
+
end
|
142
|
+
end
|
143
|
+
end
|
144
|
+
end
|
@@ -0,0 +1,32 @@
|
|
1
|
+
class MySpecHelper
|
2
|
+
|
3
|
+
attr_reader :path
|
4
|
+
|
5
|
+
def initialize(path)
|
6
|
+
|
7
|
+
@path = path
|
8
|
+
|
9
|
+
init_sample_file
|
10
|
+
end
|
11
|
+
|
12
|
+
def init_sample_file
|
13
|
+
|
14
|
+
# guard
|
15
|
+
# - if file is already exists, do not
|
16
|
+
return nil if File.exist?(@path)
|
17
|
+
|
18
|
+
# create sorted string
|
19
|
+
str = (0..10_000).to_a.sort { |a, b| a.to_s <=> b.to_s }.join("\n")
|
20
|
+
|
21
|
+
IO.write(@path, str)
|
22
|
+
end
|
23
|
+
|
24
|
+
# check returner
|
25
|
+
#
|
26
|
+
# args
|
27
|
+
# str ... string for correct
|
28
|
+
# pos ... position in the file (FileBsearch#returner)
|
29
|
+
def correct?(str, pos)
|
30
|
+
str == IO.read(@path, str.size, pos)
|
31
|
+
end
|
32
|
+
end
|
data/spec/spec_helper.rb
ADDED
metadata
ADDED
@@ -0,0 +1,122 @@
|
|
1
|
+
--- !ruby/object:Gem::Specification
|
2
|
+
name: file_bsearch
|
3
|
+
version: !ruby/object:Gem::Version
|
4
|
+
version: 1.0.2
|
5
|
+
platform: ruby
|
6
|
+
authors:
|
7
|
+
- indeep-xyz
|
8
|
+
autorequire:
|
9
|
+
bindir: bin
|
10
|
+
cert_chain: []
|
11
|
+
date: 2014-10-02 00:00:00.000000000 Z
|
12
|
+
dependencies:
|
13
|
+
- !ruby/object:Gem::Dependency
|
14
|
+
name: bundler
|
15
|
+
requirement: !ruby/object:Gem::Requirement
|
16
|
+
requirements:
|
17
|
+
- - "~>"
|
18
|
+
- !ruby/object:Gem::Version
|
19
|
+
version: '1.7'
|
20
|
+
type: :development
|
21
|
+
prerelease: false
|
22
|
+
version_requirements: !ruby/object:Gem::Requirement
|
23
|
+
requirements:
|
24
|
+
- - "~>"
|
25
|
+
- !ruby/object:Gem::Version
|
26
|
+
version: '1.7'
|
27
|
+
- !ruby/object:Gem::Dependency
|
28
|
+
name: rake
|
29
|
+
requirement: !ruby/object:Gem::Requirement
|
30
|
+
requirements:
|
31
|
+
- - "~>"
|
32
|
+
- !ruby/object:Gem::Version
|
33
|
+
version: '10.0'
|
34
|
+
type: :development
|
35
|
+
prerelease: false
|
36
|
+
version_requirements: !ruby/object:Gem::Requirement
|
37
|
+
requirements:
|
38
|
+
- - "~>"
|
39
|
+
- !ruby/object:Gem::Version
|
40
|
+
version: '10.0'
|
41
|
+
- !ruby/object:Gem::Dependency
|
42
|
+
name: rspec
|
43
|
+
requirement: !ruby/object:Gem::Requirement
|
44
|
+
requirements:
|
45
|
+
- - ">="
|
46
|
+
- !ruby/object:Gem::Version
|
47
|
+
version: '0'
|
48
|
+
type: :development
|
49
|
+
prerelease: false
|
50
|
+
version_requirements: !ruby/object:Gem::Requirement
|
51
|
+
requirements:
|
52
|
+
- - ">="
|
53
|
+
- !ruby/object:Gem::Version
|
54
|
+
version: '0'
|
55
|
+
- !ruby/object:Gem::Dependency
|
56
|
+
name: rubocop
|
57
|
+
requirement: !ruby/object:Gem::Requirement
|
58
|
+
requirements:
|
59
|
+
- - ">="
|
60
|
+
- !ruby/object:Gem::Version
|
61
|
+
version: '0'
|
62
|
+
type: :development
|
63
|
+
prerelease: false
|
64
|
+
version_requirements: !ruby/object:Gem::Requirement
|
65
|
+
requirements:
|
66
|
+
- - ">="
|
67
|
+
- !ruby/object:Gem::Version
|
68
|
+
version: '0'
|
69
|
+
description: binary search for sorted text file. it is effective when file size is
|
70
|
+
bigger.
|
71
|
+
email:
|
72
|
+
- indeep.xyz@gmail.com
|
73
|
+
executables: []
|
74
|
+
extensions: []
|
75
|
+
extra_rdoc_files: []
|
76
|
+
files:
|
77
|
+
- ".gitignore"
|
78
|
+
- ".rspec"
|
79
|
+
- ".rubocop.yml"
|
80
|
+
- ".travis.yml"
|
81
|
+
- Gemfile
|
82
|
+
- LICENSE.txt
|
83
|
+
- README.ja.md
|
84
|
+
- README.md
|
85
|
+
- Rakefile
|
86
|
+
- file_bsearch.gemspec
|
87
|
+
- lib/file_bsearch.rb
|
88
|
+
- lib/file_bsearch/common.rb
|
89
|
+
- lib/file_bsearch/get_lines.rb
|
90
|
+
- lib/file_bsearch/main.rb
|
91
|
+
- lib/file_bsearch/version.rb
|
92
|
+
- spec/file_bsearch_spec.rb
|
93
|
+
- spec/lib/my_spec_helper.rb
|
94
|
+
- spec/spec_helper.rb
|
95
|
+
homepage: https://github.com/indeep-xyz/ruby-file-bsearch/
|
96
|
+
licenses:
|
97
|
+
- MIT
|
98
|
+
metadata: {}
|
99
|
+
post_install_message:
|
100
|
+
rdoc_options: []
|
101
|
+
require_paths:
|
102
|
+
- lib
|
103
|
+
required_ruby_version: !ruby/object:Gem::Requirement
|
104
|
+
requirements:
|
105
|
+
- - ">="
|
106
|
+
- !ruby/object:Gem::Version
|
107
|
+
version: '0'
|
108
|
+
required_rubygems_version: !ruby/object:Gem::Requirement
|
109
|
+
requirements:
|
110
|
+
- - ">="
|
111
|
+
- !ruby/object:Gem::Version
|
112
|
+
version: '0'
|
113
|
+
requirements: []
|
114
|
+
rubyforge_project:
|
115
|
+
rubygems_version: 2.2.2
|
116
|
+
signing_key:
|
117
|
+
specification_version: 4
|
118
|
+
summary: binary search for sorted text file.
|
119
|
+
test_files:
|
120
|
+
- spec/file_bsearch_spec.rb
|
121
|
+
- spec/lib/my_spec_helper.rb
|
122
|
+
- spec/spec_helper.rb
|