searchyj 0.5.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/.gitignore +11 -0
- data/.rubocop.yml +37 -0
- data/.travis.yml +4 -0
- data/Gemfile +4 -0
- data/LICENSE.txt +21 -0
- data/README.ja.md +101 -0
- data/README.md +106 -0
- data/Rakefile +1 -0
- data/bin/console +14 -0
- data/bin/searchyj +4 -0
- data/bin/setup +7 -0
- data/lib/searchyj/cli.rb +69 -0
- data/lib/searchyj/main.rb +65 -0
- data/lib/searchyj/record_sorter.rb +50 -0
- data/lib/searchyj/searcher.rb +113 -0
- data/lib/searchyj/unique_logger.rb +69 -0
- data/lib/searchyj/uri_manager.rb +104 -0
- data/lib/searchyj/version.rb +3 -0
- data/lib/searchyj.rb +22 -0
- data/searchyj.gemspec +27 -0
- metadata +135 -0
checksums.yaml
ADDED
@@ -0,0 +1,7 @@
|
|
1
|
+
---
|
2
|
+
SHA1:
|
3
|
+
metadata.gz: bb887209fc4783aa50c14aa5632f5bc76b15051e
|
4
|
+
data.tar.gz: 20334940910579439b6e68d58c635bce524d95c2
|
5
|
+
SHA512:
|
6
|
+
metadata.gz: 8fe93ca7c69a7e7df5ad02799933043f971b3ed677196d251b06bd6538aacb566898011ece9c7463a3c5a3298c4a16049ce3c0b71b8969e64c2daf12a7fa71af
|
7
|
+
data.tar.gz: 4101ef48646eacd94c740d16bad68d031db19796950a053013b0816d91f142213dc25fb9947673a5c1f0ae7fab69cbde34ad1a20fc3f5c95f2637aba36727162
|
data/.gitignore
ADDED
data/.rubocop.yml
ADDED
@@ -0,0 +1,37 @@
|
|
1
|
+
AllCops:
|
2
|
+
Exclude:
|
3
|
+
- 'vendor/**/*'
|
4
|
+
- '*.gemspec'
|
5
|
+
|
6
|
+
Metrics/LineLength:
|
7
|
+
Max: 80
|
8
|
+
|
9
|
+
Metrics/MethodLength:
|
10
|
+
Max: 16
|
11
|
+
|
12
|
+
Style/Documentation:
|
13
|
+
Enabled: false
|
14
|
+
|
15
|
+
Style/FirstParameterIndentation:
|
16
|
+
Enabled: false
|
17
|
+
|
18
|
+
Style/GuardClause:
|
19
|
+
MinBodyLength: 4
|
20
|
+
|
21
|
+
Style/HashSyntax:
|
22
|
+
Enabled: false
|
23
|
+
|
24
|
+
Style/IfUnlessModifier:
|
25
|
+
Enabled: false
|
26
|
+
|
27
|
+
Style/IndentHash:
|
28
|
+
Enabled: false
|
29
|
+
|
30
|
+
Style/PercentLiteralDelimiters:
|
31
|
+
Enabled: false
|
32
|
+
|
33
|
+
Style/StringLiterals:
|
34
|
+
Enabled: false
|
35
|
+
|
36
|
+
Style/UnneededPercentQ:
|
37
|
+
Enabled: false
|
data/.travis.yml
ADDED
data/Gemfile
ADDED
data/LICENSE.txt
ADDED
@@ -0,0 +1,21 @@
|
|
1
|
+
The MIT License (MIT)
|
2
|
+
|
3
|
+
Copyright (c) 2015 indeep-xyz
|
4
|
+
|
5
|
+
Permission is hereby granted, free of charge, to any person obtaining a copy
|
6
|
+
of this software and associated documentation files (the "Software"), to deal
|
7
|
+
in the Software without restriction, including without limitation the rights
|
8
|
+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
9
|
+
copies of the Software, and to permit persons to whom the Software is
|
10
|
+
furnished to do so, subject to the following conditions:
|
11
|
+
|
12
|
+
The above copyright notice and this permission notice shall be included in
|
13
|
+
all copies or substantial portions of the Software.
|
14
|
+
|
15
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
16
|
+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
17
|
+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
18
|
+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
19
|
+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
20
|
+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
21
|
+
THE SOFTWARE.
|
data/README.ja.md
ADDED
@@ -0,0 +1,101 @@
|
|
1
|
+
SearchYJ
|
2
|
+
====
|
3
|
+
|
4
|
+
Search on Yahoo Japan.
|
5
|
+
|
6
|
+
## Installation
|
7
|
+
|
8
|
+
Gemfile に次の一行を書き加えてください。
|
9
|
+
|
10
|
+
```ruby
|
11
|
+
gem 'searchyj'
|
12
|
+
```
|
13
|
+
|
14
|
+
そして以下を実行。
|
15
|
+
|
16
|
+
$ bundle
|
17
|
+
|
18
|
+
もしくは、以下のようにしてインストール。
|
19
|
+
|
20
|
+
$ gem install searchyj
|
21
|
+
|
22
|
+
## Usage (CLI)
|
23
|
+
|
24
|
+
出力結果は基本的に JSON 形式で、以下のパラメータを持ちます。
|
25
|
+
|
26
|
+
- uri
|
27
|
+
- ウェブサイトの URI。
|
28
|
+
|
29
|
+
|
30
|
+
- title
|
31
|
+
- ウェブサイトのタイトル。
|
32
|
+
- 検索結果のタイトルをそのまま使っているため、省略形の場合があります。
|
33
|
+
|
34
|
+
|
35
|
+
- rank
|
36
|
+
- 検索順位。
|
37
|
+
- SearchYJ では検索に混じる広告を排除しながらレコードを拾っています。この機能が貧弱なため、多少値がずれることがあります。
|
38
|
+
|
39
|
+
|
40
|
+
### list
|
41
|
+
|
42
|
+
検索結果を指定の個数揃えて出力します。
|
43
|
+
|
44
|
+
$ searchyj list [options] <SearchTerm>
|
45
|
+
|
46
|
+
検索にひとつも引っかからない場合は空の配列を表す文字列が出力されます。
|
47
|
+
|
48
|
+
#### --size, -s
|
49
|
+
|
50
|
+
結果結果のサイズです。
|
51
|
+
|
52
|
+
一度の検索でこの値に満たなかった場合、この値に届くまで検索を繰り返します。値に到達するより先に検索が最後まで到達した場合には、それまでに集めた検索結果を返します。
|
53
|
+
|
54
|
+
未設定時は 10 です。
|
55
|
+
|
56
|
+
#### --from, -f
|
57
|
+
|
58
|
+
指定の検索順位から検索を開始します。
|
59
|
+
|
60
|
+
|
61
|
+
### detect
|
62
|
+
|
63
|
+
条件に合う検索結果を探し、最初に合った結果を出力します。
|
64
|
+
|
65
|
+
$ searchyj detect [options] <SearchTerm>
|
66
|
+
|
67
|
+
検索にひとつも引っかからない場合は文字列 null が出力されます。
|
68
|
+
|
69
|
+
#### --regexp, -r
|
70
|
+
|
71
|
+
マッチさせたい正規表現用の文字列です。
|
72
|
+
|
73
|
+
必須です。
|
74
|
+
|
75
|
+
#### --key, -k
|
76
|
+
|
77
|
+
比較対象のキー名です。 'title' か 'uri' を渡すことができます。
|
78
|
+
|
79
|
+
未設定時は 'title' となります。
|
80
|
+
|
81
|
+
### at_rank
|
82
|
+
|
83
|
+
指定順位の検索結果を出力します。
|
84
|
+
|
85
|
+
$ searchyj at_rank [options] <SearchTerm>
|
86
|
+
|
87
|
+
検索にひとつも引っかからない場合は文字列 null が出力されます。
|
88
|
+
|
89
|
+
#### --rank, -r
|
90
|
+
|
91
|
+
出力したい順位です。
|
92
|
+
|
93
|
+
必須です。
|
94
|
+
|
95
|
+
## Usage (Programming)
|
96
|
+
|
97
|
+
'lib/searchyj.rb' やその他を読んでください。
|
98
|
+
|
99
|
+
## Author
|
100
|
+
|
101
|
+
[indeep-xyz](http://blog.indeep.xyz/)
|
data/README.md
ADDED
@@ -0,0 +1,106 @@
|
|
1
|
+
SearchYJ
|
2
|
+
====
|
3
|
+
|
4
|
+
Search on Yahoo Japan.
|
5
|
+
|
6
|
+
## Installation
|
7
|
+
|
8
|
+
Add this line to your application's Gemfile:
|
9
|
+
|
10
|
+
```ruby
|
11
|
+
gem 'searchyj'
|
12
|
+
```
|
13
|
+
|
14
|
+
And then execute:
|
15
|
+
|
16
|
+
$ bundle
|
17
|
+
|
18
|
+
Or install it yourself as:
|
19
|
+
|
20
|
+
$ gem install searchyj
|
21
|
+
|
22
|
+
## Usage (CLI)
|
23
|
+
|
24
|
+
The output format is generally JSON.
|
25
|
+
|
26
|
+
It has parameters the followings.
|
27
|
+
|
28
|
+
- uri
|
29
|
+
- The URI of the web site。
|
30
|
+
|
31
|
+
|
32
|
+
- title
|
33
|
+
- The title of the web site.
|
34
|
+
- The title might be abbreviation.
|
35
|
+
|
36
|
+
|
37
|
+
- rank
|
38
|
+
- The rank order in the search ranking.
|
39
|
+
- This number might have a slight error. As a reason for that, SearchYJ pick up the records which has excepted the advertisements in the search result. The exception feature is rough machining yet.
|
40
|
+
|
41
|
+
|
42
|
+
### list
|
43
|
+
|
44
|
+
Print the search results that has collected the ordered number.
|
45
|
+
|
46
|
+
$ searchyj list [options] <SearchTerm>
|
47
|
+
|
48
|
+
If the search result was nothing, print a string of an empty Array.
|
49
|
+
|
50
|
+
#### --size, -s
|
51
|
+
|
52
|
+
The size of the result records.
|
53
|
+
|
54
|
+
If a number of the search result is less than this option's value, search for next page until the sum of the results reach to option's value.
|
55
|
+
|
56
|
+
If reach the end of searching before reach to option's value, print the collected records at that time.
|
57
|
+
|
58
|
+
The default value is 10.
|
59
|
+
|
60
|
+
|
61
|
+
#### --from, -f
|
62
|
+
|
63
|
+
Start to search from this number of the search ranking.
|
64
|
+
|
65
|
+
|
66
|
+
### detect
|
67
|
+
|
68
|
+
Print a record of the search result that has matched own arguments.
|
69
|
+
|
70
|
+
$ searchyj detect [options] <SearchTerm>
|
71
|
+
|
72
|
+
If the search result was nothing or did not find, print a string of null.
|
73
|
+
|
74
|
+
#### --regexp, -r
|
75
|
+
|
76
|
+
A string as a regular expression that want to match with value of a record[key].
|
77
|
+
|
78
|
+
The option is required.
|
79
|
+
|
80
|
+
#### --key, -k
|
81
|
+
|
82
|
+
The key name for comparing values. You can pass any of 'title' or 'uri'.
|
83
|
+
|
84
|
+
The default value is 'title'.
|
85
|
+
|
86
|
+
### at_rank
|
87
|
+
|
88
|
+
Print a record of the search result at a particular rank order in the search ranking.
|
89
|
+
|
90
|
+
$ searchyj at_rank [options] <SearchTerm>
|
91
|
+
|
92
|
+
If the search result was nothing or did not find, print a string of null.
|
93
|
+
|
94
|
+
#### --rank, -r
|
95
|
+
|
96
|
+
The rank order in the search ranking.
|
97
|
+
|
98
|
+
The option is required.
|
99
|
+
|
100
|
+
## Usage (Programming)
|
101
|
+
|
102
|
+
Please read 'lib/searchyj.rb' and others.
|
103
|
+
|
104
|
+
## Author
|
105
|
+
|
106
|
+
[indeep-xyz](http://blog.indeep.xyz/)
|
data/Rakefile
ADDED
@@ -0,0 +1 @@
|
|
1
|
+
require "bundler/gem_tasks"
|
data/bin/console
ADDED
@@ -0,0 +1,14 @@
|
|
1
|
+
#!/usr/bin/env ruby
|
2
|
+
|
3
|
+
require "bundler/setup"
|
4
|
+
require "searchyj"
|
5
|
+
|
6
|
+
# You can add fixtures and/or initialization code here to make experimenting
|
7
|
+
# with your gem easier. You can also use a different console, if you like.
|
8
|
+
|
9
|
+
# (If you use this, don't forget to add pry to your Gemfile!)
|
10
|
+
# require "pry"
|
11
|
+
# Pry.start
|
12
|
+
|
13
|
+
require "irb"
|
14
|
+
IRB.start
|
data/bin/searchyj
ADDED
data/bin/setup
ADDED
data/lib/searchyj/cli.rb
ADDED
@@ -0,0 +1,69 @@
|
|
1
|
+
require 'searchyj'
|
2
|
+
require 'thor'
|
3
|
+
require 'json'
|
4
|
+
|
5
|
+
module SearchYJ
|
6
|
+
class CLI < Thor
|
7
|
+
desc 'at_rank',
|
8
|
+
"Get a record in the search result\n" \
|
9
|
+
'at a particular rank order in the search ranking.'
|
10
|
+
option :rank,
|
11
|
+
type: :numeric,
|
12
|
+
required: true,
|
13
|
+
aliases: '-r',
|
14
|
+
desc: 'The rank order in the search ranking'
|
15
|
+
def at_rank(term)
|
16
|
+
rank = options[:rank]
|
17
|
+
|
18
|
+
puts JSON.dump(
|
19
|
+
result = SearchYJ.at_rank(term, rank)
|
20
|
+
)
|
21
|
+
end
|
22
|
+
|
23
|
+
desc 'detect',
|
24
|
+
"Detect a first record that\n" \
|
25
|
+
'meet the conditions of a regexp and a key.'
|
26
|
+
option :regexp,
|
27
|
+
type: :string,
|
28
|
+
required: true,
|
29
|
+
aliases: '-r',
|
30
|
+
desc: 'Regexp that want to match with value of a record'
|
31
|
+
option :key,
|
32
|
+
type: :string,
|
33
|
+
default: 'title',
|
34
|
+
aliases: '-k',
|
35
|
+
desc: \
|
36
|
+
'The key name for comparing values. ' \
|
37
|
+
'You can pass any of \'title\' or \'uri\'. '
|
38
|
+
def detect(term)
|
39
|
+
key = options[:key]
|
40
|
+
regexp = Regexp.new(options[:regexp])
|
41
|
+
|
42
|
+
puts JSON.dump(
|
43
|
+
SearchYJ.detect(term, regexp, key)
|
44
|
+
)
|
45
|
+
end
|
46
|
+
|
47
|
+
desc 'list',
|
48
|
+
'Get records of the search result.'
|
49
|
+
option :size,
|
50
|
+
type: :numeric,
|
51
|
+
default: 10,
|
52
|
+
aliases: '-s',
|
53
|
+
desc: 'The size of the returner'
|
54
|
+
option :from,
|
55
|
+
type: :numeric,
|
56
|
+
default: 1,
|
57
|
+
aliases: '-f',
|
58
|
+
desc: \
|
59
|
+
'Start to search from this number of the search ranking'
|
60
|
+
def list(term)
|
61
|
+
size = options[:size]
|
62
|
+
from = options[:from]
|
63
|
+
|
64
|
+
puts JSON.dump(
|
65
|
+
SearchYJ.list(term, size, from)
|
66
|
+
)
|
67
|
+
end
|
68
|
+
end
|
69
|
+
end
|
@@ -0,0 +1,65 @@
|
|
1
|
+
require 'searchyj/searcher'
|
2
|
+
|
3
|
+
module SearchYJ
|
4
|
+
class Main
|
5
|
+
# Get a record in the search result
|
6
|
+
# at a particular rank order in the search ranking.
|
7
|
+
# @param term [String] Search term
|
8
|
+
# @param rank [Integer] The rank order in the search ranking
|
9
|
+
#
|
10
|
+
# @return [Hash]
|
11
|
+
# A result record if matched the arguments
|
12
|
+
# Else nil
|
13
|
+
def at_rank(term, rank)
|
14
|
+
result = list(term, 1, rank)
|
15
|
+
(result.size > 0) ? result[0] : nil
|
16
|
+
end
|
17
|
+
|
18
|
+
# Detect a first record that
|
19
|
+
# meet the conditions of a regexp and a key.
|
20
|
+
# @param term [String] Search term
|
21
|
+
# @param regexp [Regexp] Want to match with value of a record[key]
|
22
|
+
# @param key [Symbol] The key name for comparing values
|
23
|
+
#
|
24
|
+
# @return [Hash]
|
25
|
+
# A result record if matched the arguments
|
26
|
+
# Else nil
|
27
|
+
def detect(term, regexp, key = :title)
|
28
|
+
key = key.to_sym unless key.is_a?(Symbol)
|
29
|
+
|
30
|
+
searcher = Searcher.new
|
31
|
+
searcher.uri.search_term = term
|
32
|
+
|
33
|
+
searcher.run do |record|
|
34
|
+
if regexp.match(record[key])
|
35
|
+
return record
|
36
|
+
end
|
37
|
+
end
|
38
|
+
|
39
|
+
nil
|
40
|
+
end
|
41
|
+
|
42
|
+
# Get records of the search result.
|
43
|
+
# @param term [String] Search term
|
44
|
+
# @param size [Integer] The size of the returner
|
45
|
+
# @param from [Integer]
|
46
|
+
# Start to search from this number of the search ranking
|
47
|
+
#
|
48
|
+
# @return [Array]
|
49
|
+
# Includes the result records
|
50
|
+
def list(term, size = 10, start_index = 1)
|
51
|
+
searcher = Searcher.new
|
52
|
+
searcher.uri.search_term = term
|
53
|
+
searcher.uri.index = start_index
|
54
|
+
searcher.page_size = 10
|
55
|
+
list = []
|
56
|
+
|
57
|
+
searcher.run do |record|
|
58
|
+
list << record
|
59
|
+
break if list.size >= size
|
60
|
+
end
|
61
|
+
|
62
|
+
list
|
63
|
+
end
|
64
|
+
end
|
65
|
+
end
|
@@ -0,0 +1,50 @@
|
|
1
|
+
require 'searchyj/unique_logger'
|
2
|
+
|
3
|
+
module SearchYJ
|
4
|
+
#
|
5
|
+
# Sort the process for each records
|
6
|
+
#
|
7
|
+
# @author [indeep-xyz]
|
8
|
+
#
|
9
|
+
class RecordSorter
|
10
|
+
attr_accessor :rank
|
11
|
+
|
12
|
+
# Initialize myself
|
13
|
+
# @param rank [Integer]
|
14
|
+
# The starting number of the rank of the records
|
15
|
+
# @param logger_size [Integer]
|
16
|
+
# The logging size of UniqueLogger
|
17
|
+
#
|
18
|
+
# @return [type] [description]
|
19
|
+
def initialize(rank, logger_size)
|
20
|
+
@rank = rank
|
21
|
+
initialize_logger(logger_size)
|
22
|
+
end
|
23
|
+
|
24
|
+
# Initialize the instance of UniqueLogger
|
25
|
+
# @param size [Integer] The logging size of UniqueLogger
|
26
|
+
#
|
27
|
+
# @return [type] [description]
|
28
|
+
def initialize_logger(size)
|
29
|
+
@logger = UniqueLogger.new(size)
|
30
|
+
end
|
31
|
+
|
32
|
+
# Return the number of page gap in the searching.
|
33
|
+
# And reset logger's count.
|
34
|
+
#
|
35
|
+
# @return [Integer]
|
36
|
+
def page_gap
|
37
|
+
@logger.failure_count(true)
|
38
|
+
end
|
39
|
+
|
40
|
+
def run(records, &block)
|
41
|
+
records.each do |record|
|
42
|
+
next if @logger.add(record[:uri]) == false
|
43
|
+
|
44
|
+
record[:rank] = @rank
|
45
|
+
block.call(record)
|
46
|
+
@rank += 1
|
47
|
+
end
|
48
|
+
end
|
49
|
+
end
|
50
|
+
end
|
@@ -0,0 +1,113 @@
|
|
1
|
+
require 'nokogiri'
|
2
|
+
require 'open-uri'
|
3
|
+
require 'searchyj/uri_manager'
|
4
|
+
require 'searchyj/record_sorter'
|
5
|
+
|
6
|
+
module SearchYJ
|
7
|
+
#
|
8
|
+
# Search from the search engine,
|
9
|
+
# parse HTML,
|
10
|
+
# dig the atound page
|
11
|
+
#
|
12
|
+
# @author [indeep-xyz]
|
13
|
+
#
|
14
|
+
class Searcher
|
15
|
+
attr_reader :results
|
16
|
+
attr_accessor :limit_loop, :user_agent, :sleep_time, :page_size, :uri
|
17
|
+
USER_AGENT = \
|
18
|
+
'Mozilla/5.0 (Windows NT 6.1; WOW64; rv:38.0)' \
|
19
|
+
'Gecko/20100101 Firefox/38.0'
|
20
|
+
|
21
|
+
OpenUriError = Class.new(StandardError)
|
22
|
+
|
23
|
+
# Initialize myself.
|
24
|
+
def initialize
|
25
|
+
@uri = UriManager.new
|
26
|
+
@limit_loop = 10
|
27
|
+
@user_agent = USER_AGENT
|
28
|
+
@sleep_time = 1
|
29
|
+
@page_size = 10
|
30
|
+
end
|
31
|
+
|
32
|
+
def run(&block)
|
33
|
+
loop_count = 0
|
34
|
+
sorter = RecordSorter.new(@uri.index, @page_size)
|
35
|
+
|
36
|
+
while loop_count < @limit_loop
|
37
|
+
fetch_html
|
38
|
+
records = extract_records
|
39
|
+
|
40
|
+
sorter.run(records, &block)
|
41
|
+
|
42
|
+
if records.empty? || final_page?
|
43
|
+
break
|
44
|
+
end
|
45
|
+
|
46
|
+
next_page(records.size + sorter.page_gap)
|
47
|
+
sleep @sleep_time
|
48
|
+
loop_count += 1
|
49
|
+
end
|
50
|
+
end
|
51
|
+
|
52
|
+
private
|
53
|
+
|
54
|
+
# Extract and optimize the records
|
55
|
+
# from my own HTML instance.
|
56
|
+
#
|
57
|
+
# @return [Array]
|
58
|
+
# Include Hash, [:uri, title]
|
59
|
+
def extract_records
|
60
|
+
results = []
|
61
|
+
nodes = @html.css('#WS2m>.w h3 a')
|
62
|
+
|
63
|
+
nodes.each do |node|
|
64
|
+
results.push(
|
65
|
+
uri: node.attribute('href').text,
|
66
|
+
title: node.text
|
67
|
+
)
|
68
|
+
end
|
69
|
+
|
70
|
+
results
|
71
|
+
end
|
72
|
+
|
73
|
+
# Download raw HTML from YJ and return it.
|
74
|
+
#
|
75
|
+
# @return [String] raw HTML
|
76
|
+
def download_raw_html
|
77
|
+
uri = @uri.to_s
|
78
|
+
params = {
|
79
|
+
'User-Agent' => @user_agent
|
80
|
+
}
|
81
|
+
|
82
|
+
open(uri, params) do |f|
|
83
|
+
fail OpenUriError unless f.status[0] == '200'
|
84
|
+
f.read
|
85
|
+
end
|
86
|
+
end
|
87
|
+
|
88
|
+
# Download HTML from YJ
|
89
|
+
# and set the parsed HTML data to my own instance.
|
90
|
+
def fetch_html
|
91
|
+
raw_html = download_raw_html
|
92
|
+
@html = Nokogiri::HTML.parse(raw_html, nil, 'UTF-8')
|
93
|
+
end
|
94
|
+
|
95
|
+
# Check whether or not the next page is exist.
|
96
|
+
#
|
97
|
+
# @return [bool]
|
98
|
+
# It is true if the navigation element
|
99
|
+
# for the next page is exist.
|
100
|
+
# Else false.
|
101
|
+
def final_page?
|
102
|
+
a = @html.css('#Sp1 .m a').last
|
103
|
+
|
104
|
+
!(a.is_a?(Nokogiri::XML::Element) &&
|
105
|
+
a.text.include?('次へ'))
|
106
|
+
end
|
107
|
+
|
108
|
+
# Move to the next page.
|
109
|
+
def next_page(page_size)
|
110
|
+
@uri.move_index(page_size)
|
111
|
+
end
|
112
|
+
end
|
113
|
+
end
|
@@ -0,0 +1,69 @@
|
|
1
|
+
module SearchYJ
|
2
|
+
#
|
3
|
+
# Logging unique data
|
4
|
+
#
|
5
|
+
# @author [indeep-xyz]
|
6
|
+
#
|
7
|
+
class UniqueLogger
|
8
|
+
# Initialize myself.
|
9
|
+
# @param limit [Integer] The limit of the log
|
10
|
+
def initialize(limit)
|
11
|
+
@limit = limit
|
12
|
+
reset
|
13
|
+
end
|
14
|
+
|
15
|
+
# Add to log.
|
16
|
+
# If can not add the value, count up the adding failure.
|
17
|
+
# @param value [type] [description]
|
18
|
+
#
|
19
|
+
# @return [Object]
|
20
|
+
# False if could not add the value.
|
21
|
+
def add(value)
|
22
|
+
if exist?(value)
|
23
|
+
@failure_count += 1
|
24
|
+
return false
|
25
|
+
end
|
26
|
+
|
27
|
+
@log << value
|
28
|
+
@log.shift if @log.length > @limit
|
29
|
+
end
|
30
|
+
|
31
|
+
# Return the size of log.
|
32
|
+
# @return [Integer] The size of @log
|
33
|
+
def length
|
34
|
+
@log.length
|
35
|
+
end
|
36
|
+
|
37
|
+
# Reset my own log data.
|
38
|
+
def reset
|
39
|
+
@log = []
|
40
|
+
reset_failure_count
|
41
|
+
end
|
42
|
+
|
43
|
+
# Return the number of the failure count.
|
44
|
+
# @param with_reset [Boolean] If true, reset failure count
|
45
|
+
#
|
46
|
+
# @return [Integer] The number of failure count
|
47
|
+
def failure_count(with_reset = false)
|
48
|
+
n = @failure_count
|
49
|
+
reset_failure_count if with_reset
|
50
|
+
|
51
|
+
n
|
52
|
+
end
|
53
|
+
|
54
|
+
# Reset the failure count.
|
55
|
+
def reset_failure_count
|
56
|
+
@failure_count = 0
|
57
|
+
end
|
58
|
+
|
59
|
+
# Check whether the value is in @log
|
60
|
+
# @param value [Object]
|
61
|
+
#
|
62
|
+
# @return [Boolean]
|
63
|
+
# True if the argument found.
|
64
|
+
# False else.
|
65
|
+
def exist?(value)
|
66
|
+
@log.include?(value)
|
67
|
+
end
|
68
|
+
end
|
69
|
+
end
|
@@ -0,0 +1,104 @@
|
|
1
|
+
require 'cgi'
|
2
|
+
|
3
|
+
module SearchYJ
|
4
|
+
#
|
5
|
+
# Manage the URI instance
|
6
|
+
#
|
7
|
+
# @author [indeep-xyz]
|
8
|
+
#
|
9
|
+
class UriManager
|
10
|
+
URI_BASE = 'http://search.yahoo.co.jp/search'
|
11
|
+
QUERY_DEFAULT = {
|
12
|
+
# Search term
|
13
|
+
p: nil,
|
14
|
+
# Character encoding
|
15
|
+
ei: 'UTF-8',
|
16
|
+
# Suppress that advise to rewrite the search-term
|
17
|
+
qrw: 0,
|
18
|
+
# Flag for offset (?)
|
19
|
+
pstart: 1,
|
20
|
+
# Offset
|
21
|
+
# - if less than or equal to 1,
|
22
|
+
# - the search result is from first
|
23
|
+
b: 1
|
24
|
+
}
|
25
|
+
|
26
|
+
IndexError = Class.new(StandardError)
|
27
|
+
PageSizeError = Class.new(StandardError)
|
28
|
+
SearchTermError = Class.new(StandardError)
|
29
|
+
|
30
|
+
def initialize(query = {})
|
31
|
+
@query = QUERY_DEFAULT.merge(query)
|
32
|
+
end
|
33
|
+
|
34
|
+
def search_term
|
35
|
+
@query[:p]
|
36
|
+
end
|
37
|
+
|
38
|
+
def index
|
39
|
+
(@query[:b] < 1) ? 1 : @query[:b]
|
40
|
+
end
|
41
|
+
|
42
|
+
def base
|
43
|
+
URI_BASE
|
44
|
+
end
|
45
|
+
|
46
|
+
def search_term=(search_term)
|
47
|
+
fail SearchTermError unless search_term.is_a?(String)
|
48
|
+
|
49
|
+
@query[:p] = search_term
|
50
|
+
end
|
51
|
+
|
52
|
+
def index=(index)
|
53
|
+
fail IndexError unless index.is_a?(Integer)
|
54
|
+
fail IndexError if index < 1
|
55
|
+
|
56
|
+
@query[:b] = index
|
57
|
+
end
|
58
|
+
|
59
|
+
def to_s
|
60
|
+
uri = URI(URI_BASE)
|
61
|
+
uri.query = create_query_string
|
62
|
+
uri.to_s
|
63
|
+
end
|
64
|
+
|
65
|
+
def move_index(distance)
|
66
|
+
@query[:b] += distance
|
67
|
+
@query[:b] = 1 if @query[:b] < 1
|
68
|
+
end
|
69
|
+
|
70
|
+
private
|
71
|
+
|
72
|
+
def optimize_query
|
73
|
+
query = @query.dup
|
74
|
+
|
75
|
+
query[:b] = query[:b].to_i
|
76
|
+
|
77
|
+
if query[:b].nil? || query[:b] < 2
|
78
|
+
query.delete(:b)
|
79
|
+
query.delete(:pstart)
|
80
|
+
end
|
81
|
+
|
82
|
+
query
|
83
|
+
end
|
84
|
+
|
85
|
+
def create_query_string
|
86
|
+
query = optimize_query
|
87
|
+
stock = []
|
88
|
+
|
89
|
+
query.each do |k, v|
|
90
|
+
next if available_value?(v)
|
91
|
+
|
92
|
+
k = k.to_s unless k.is_a?(String)
|
93
|
+
v = v.to_s unless v.is_a?(String)
|
94
|
+
stock << "#{CGI.escape(k)}=#{CGI.escape(v)}"
|
95
|
+
end
|
96
|
+
|
97
|
+
stock.compact * '&'
|
98
|
+
end
|
99
|
+
|
100
|
+
def available_value?(v)
|
101
|
+
(v.is_a?(Hash) || v.is_a?(Array)) && v.empty?
|
102
|
+
end
|
103
|
+
end
|
104
|
+
end
|
data/lib/searchyj.rb
ADDED
@@ -0,0 +1,22 @@
|
|
1
|
+
require "searchyj/version"
|
2
|
+
require "searchyj/main"
|
3
|
+
require "searchyj/cli"
|
4
|
+
|
5
|
+
module SearchYJ
|
6
|
+
module_function
|
7
|
+
|
8
|
+
def list(term, size = 10, start_index = 1)
|
9
|
+
mgr = SearchYJ::Main.new
|
10
|
+
mgr.list(term, size, start_index)
|
11
|
+
end
|
12
|
+
|
13
|
+
def at_rank(term, rank)
|
14
|
+
mgr = SearchYJ::Main.new
|
15
|
+
mgr.at_rank(term, rank)
|
16
|
+
end
|
17
|
+
|
18
|
+
def detect(term, regexp, key = :title)
|
19
|
+
mgr = SearchYJ::Main.new
|
20
|
+
mgr.detect(term, regexp, key)
|
21
|
+
end
|
22
|
+
end
|
data/searchyj.gemspec
ADDED
@@ -0,0 +1,27 @@
|
|
1
|
+
# coding: utf-8
|
2
|
+
lib = File.expand_path('../lib', __FILE__)
|
3
|
+
$LOAD_PATH.unshift(lib) unless $LOAD_PATH.include?(lib)
|
4
|
+
require 'searchyj/version'
|
5
|
+
|
6
|
+
Gem::Specification.new do |spec|
|
7
|
+
spec.name = "searchyj"
|
8
|
+
spec.version = SearchYJ::VERSION
|
9
|
+
spec.authors = ["indeep-xyz"]
|
10
|
+
spec.email = ["indeep.xyz@gmail.com"]
|
11
|
+
|
12
|
+
spec.summary = %q{Search on Yahoo Japan}
|
13
|
+
spec.description = %q{Search on Yahoo Japan}
|
14
|
+
spec.homepage = "https://github.com/indeep-xyz/searchyj"
|
15
|
+
spec.license = "MIT"
|
16
|
+
|
17
|
+
spec.files = `git ls-files -z`.split("\x0").reject { |f| f.match(%r{^(test|spec|features)/}) }
|
18
|
+
spec.bindir = "bin"
|
19
|
+
spec.executables = ['searchyj']
|
20
|
+
spec.require_paths = ["lib"]
|
21
|
+
|
22
|
+
spec.add_dependency "nokogiri", '~> 1.6.6.2'
|
23
|
+
spec.add_dependency "thor", '~> 0.19.1'
|
24
|
+
spec.add_development_dependency "bundler", "~> 1.10"
|
25
|
+
spec.add_development_dependency "rake", "~> 10.0"
|
26
|
+
spec.add_development_dependency "rubocop", "~> 0.32.0"
|
27
|
+
end
|
metadata
ADDED
@@ -0,0 +1,135 @@
|
|
1
|
+
--- !ruby/object:Gem::Specification
|
2
|
+
name: searchyj
|
3
|
+
version: !ruby/object:Gem::Version
|
4
|
+
version: 0.5.2
|
5
|
+
platform: ruby
|
6
|
+
authors:
|
7
|
+
- indeep-xyz
|
8
|
+
autorequire:
|
9
|
+
bindir: bin
|
10
|
+
cert_chain: []
|
11
|
+
date: 2015-06-22 00:00:00.000000000 Z
|
12
|
+
dependencies:
|
13
|
+
- !ruby/object:Gem::Dependency
|
14
|
+
name: nokogiri
|
15
|
+
requirement: !ruby/object:Gem::Requirement
|
16
|
+
requirements:
|
17
|
+
- - "~>"
|
18
|
+
- !ruby/object:Gem::Version
|
19
|
+
version: 1.6.6.2
|
20
|
+
type: :runtime
|
21
|
+
prerelease: false
|
22
|
+
version_requirements: !ruby/object:Gem::Requirement
|
23
|
+
requirements:
|
24
|
+
- - "~>"
|
25
|
+
- !ruby/object:Gem::Version
|
26
|
+
version: 1.6.6.2
|
27
|
+
- !ruby/object:Gem::Dependency
|
28
|
+
name: thor
|
29
|
+
requirement: !ruby/object:Gem::Requirement
|
30
|
+
requirements:
|
31
|
+
- - "~>"
|
32
|
+
- !ruby/object:Gem::Version
|
33
|
+
version: 0.19.1
|
34
|
+
type: :runtime
|
35
|
+
prerelease: false
|
36
|
+
version_requirements: !ruby/object:Gem::Requirement
|
37
|
+
requirements:
|
38
|
+
- - "~>"
|
39
|
+
- !ruby/object:Gem::Version
|
40
|
+
version: 0.19.1
|
41
|
+
- !ruby/object:Gem::Dependency
|
42
|
+
name: bundler
|
43
|
+
requirement: !ruby/object:Gem::Requirement
|
44
|
+
requirements:
|
45
|
+
- - "~>"
|
46
|
+
- !ruby/object:Gem::Version
|
47
|
+
version: '1.10'
|
48
|
+
type: :development
|
49
|
+
prerelease: false
|
50
|
+
version_requirements: !ruby/object:Gem::Requirement
|
51
|
+
requirements:
|
52
|
+
- - "~>"
|
53
|
+
- !ruby/object:Gem::Version
|
54
|
+
version: '1.10'
|
55
|
+
- !ruby/object:Gem::Dependency
|
56
|
+
name: rake
|
57
|
+
requirement: !ruby/object:Gem::Requirement
|
58
|
+
requirements:
|
59
|
+
- - "~>"
|
60
|
+
- !ruby/object:Gem::Version
|
61
|
+
version: '10.0'
|
62
|
+
type: :development
|
63
|
+
prerelease: false
|
64
|
+
version_requirements: !ruby/object:Gem::Requirement
|
65
|
+
requirements:
|
66
|
+
- - "~>"
|
67
|
+
- !ruby/object:Gem::Version
|
68
|
+
version: '10.0'
|
69
|
+
- !ruby/object:Gem::Dependency
|
70
|
+
name: rubocop
|
71
|
+
requirement: !ruby/object:Gem::Requirement
|
72
|
+
requirements:
|
73
|
+
- - "~>"
|
74
|
+
- !ruby/object:Gem::Version
|
75
|
+
version: 0.32.0
|
76
|
+
type: :development
|
77
|
+
prerelease: false
|
78
|
+
version_requirements: !ruby/object:Gem::Requirement
|
79
|
+
requirements:
|
80
|
+
- - "~>"
|
81
|
+
- !ruby/object:Gem::Version
|
82
|
+
version: 0.32.0
|
83
|
+
description: Search on Yahoo Japan
|
84
|
+
email:
|
85
|
+
- indeep.xyz@gmail.com
|
86
|
+
executables:
|
87
|
+
- searchyj
|
88
|
+
extensions: []
|
89
|
+
extra_rdoc_files: []
|
90
|
+
files:
|
91
|
+
- ".gitignore"
|
92
|
+
- ".rubocop.yml"
|
93
|
+
- ".travis.yml"
|
94
|
+
- Gemfile
|
95
|
+
- LICENSE.txt
|
96
|
+
- README.ja.md
|
97
|
+
- README.md
|
98
|
+
- Rakefile
|
99
|
+
- bin/console
|
100
|
+
- bin/searchyj
|
101
|
+
- bin/setup
|
102
|
+
- lib/searchyj.rb
|
103
|
+
- lib/searchyj/cli.rb
|
104
|
+
- lib/searchyj/main.rb
|
105
|
+
- lib/searchyj/record_sorter.rb
|
106
|
+
- lib/searchyj/searcher.rb
|
107
|
+
- lib/searchyj/unique_logger.rb
|
108
|
+
- lib/searchyj/uri_manager.rb
|
109
|
+
- lib/searchyj/version.rb
|
110
|
+
- searchyj.gemspec
|
111
|
+
homepage: https://github.com/indeep-xyz/searchyj
|
112
|
+
licenses:
|
113
|
+
- MIT
|
114
|
+
metadata: {}
|
115
|
+
post_install_message:
|
116
|
+
rdoc_options: []
|
117
|
+
require_paths:
|
118
|
+
- lib
|
119
|
+
required_ruby_version: !ruby/object:Gem::Requirement
|
120
|
+
requirements:
|
121
|
+
- - ">="
|
122
|
+
- !ruby/object:Gem::Version
|
123
|
+
version: '0'
|
124
|
+
required_rubygems_version: !ruby/object:Gem::Requirement
|
125
|
+
requirements:
|
126
|
+
- - ">="
|
127
|
+
- !ruby/object:Gem::Version
|
128
|
+
version: '0'
|
129
|
+
requirements: []
|
130
|
+
rubyforge_project:
|
131
|
+
rubygems_version: 2.2.2
|
132
|
+
signing_key:
|
133
|
+
specification_version: 4
|
134
|
+
summary: Search on Yahoo Japan
|
135
|
+
test_files: []
|