seiya 0.0.7.5 → 0.0.8

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: 4d611112f85f93780736302a06e67091bb3114a4
4
- data.tar.gz: 941172b9cd341b05e0af011c1a44aadef7aa3ee2
3
+ metadata.gz: 2226d4f76aa277059e8da84420def305d0ab0be2
4
+ data.tar.gz: 652ac53f990da56b552d42ec18b5d54d887e75d0
5
5
  SHA512:
6
- metadata.gz: d1ee4335d3b8d2c4534117e56b0a874bdca536ed1779813a9aa613f8e36fb53feb167d1b0a6ed4b85f7ff252d608a5af4f0e120898978773f474876ccddb2dbf
7
- data.tar.gz: 92a7195643cc5231e23f639b0409289e174a6e3f80051b3bc8471ca034e5a200a9e02c8ba84a740680b46920a62ebb9d96c2035feab209c616fbb74d87b74e61
6
+ metadata.gz: f466b0e929feeee9042c3576994cfedcd53c00433f69860d503e5ab1d3593d5e5cd1bf44a2aae1512e7133006f63a3b9f02403e588c4cd7887c6429396bd4d2a
7
+ data.tar.gz: 3726f0b6bbcc902fc5512fbea2e63ad38b8ac00dfa726b1bdefa8a4cf7e94d88b406dfd7d2b929cb50f3156c6780dba7afe4ca6896dff572c04f5c987c4d0e6a
@@ -1,15 +1,16 @@
1
1
  require 'seiya/command'
2
2
  require 'optparse'
3
3
 
4
- module Contrib
5
- module Commands
6
- class Crawl < Seiya::Command
7
- def summary
8
- 'Crawl a task'
9
- end
4
+ module Seiya
5
+ module Contrib
6
+ module Commands
7
+ class Crawl < Seiya::Command
8
+ def summary
9
+ 'Crawl a task'
10
+ end
10
11
 
11
- def usage
12
- 'Usage
12
+ def usage
13
+ 'Usage
13
14
  =====
14
15
  seiya crawl <task_name> [options]
15
16
 
@@ -20,52 +21,53 @@ Options
20
21
  --help, -h show this help message and exit
21
22
  -a NAME=VALUE set task argument (may be repeated)
22
23
  --list, -l show task list in this project'
23
- end
24
+ end
24
25
 
25
- def print_task_list
26
- task_list = Seiya.tasks.map do |k, v|
27
- '%-14s%-30s' % [k, v]
28
- end.join("\n")
29
- puts "Task list
26
+ def print_task_list
27
+ task_list = Seiya.tasks.map do |k, v|
28
+ '%-14s%-30s' % [k, v]
29
+ end.join("\n")
30
+ puts "Task list
30
31
  =========
31
32
 
32
33
  #{task_list}
33
34
  "
34
- end
35
+ end
35
36
 
36
- def run(*args)
37
- options = {}
38
- OptionParser.new do |opts|
39
- opts.banner = 'Usage: seiya [options]'
37
+ def run(*args)
38
+ options = {}
39
+ OptionParser.new do |opts|
40
+ opts.banner = 'Usage: seiya [options]'
40
41
 
41
- opts.on '-aArg', '--argument=Arg', 'send argument to seiya task' do |a|
42
- options[:args] = [] unless options[:args]
43
- options[:args] << a
44
- end
42
+ opts.on '-aArg', '--argument=Arg', 'send argument to seiya task' do |a|
43
+ options[:args] = [] unless options[:args]
44
+ options[:args] << a
45
+ end
45
46
 
46
- opts.on '-l', '--list', 'list tasks' do |l|
47
- options[:list] = l
48
- end
49
- end.parse!
47
+ opts.on '-l', '--list', 'list tasks' do |l|
48
+ options[:list] = l
49
+ end
50
+ end.parse!
50
51
 
51
- if options[:list]
52
- print_task_list
53
- exit 0
54
- end
52
+ if options[:list]
53
+ print_task_list
54
+ exit 0
55
+ end
55
56
 
56
- task_name = args.shift
57
- if task_name.nil?
58
- puts 'Need a task_name'
59
- exit!
60
- end
61
- task_class = Seiya.get_task_class task_name
57
+ task_name = args.shift
58
+ if task_name.nil?
59
+ puts 'Need a task_name'
60
+ exit!
61
+ end
62
+ task_class = Seiya.get_task_class task_name
62
63
 
63
64
 
64
- _args = options[:args] ? options[:args] : []
65
+ _args = options[:args] ? options[:args] : []
65
66
 
66
- task = task_class.new *_args
67
+ task = task_class.new *_args
67
68
 
68
- task.run
69
+ task.run
70
+ end
69
71
  end
70
72
  end
71
73
  end
@@ -1,13 +1,15 @@
1
1
  require 'seiya/command'
2
- module Contrib
3
- module Commands
4
- class Create < Seiya::Command
5
- def summary
6
- 'Create a new project'
7
- end
8
2
 
9
- def usage
10
- 'Usage
3
+ module Seiya
4
+ module Contrib
5
+ module Commands
6
+ class Create < Seiya::Command
7
+ def summary
8
+ 'Create a new project'
9
+ end
10
+
11
+ def usage
12
+ 'Usage
11
13
  =====
12
14
  seiya create <project_name>
13
15
 
@@ -16,15 +18,16 @@ Create new project
16
18
  Options
17
19
  =======
18
20
  --help, -h show this help message and exit'
19
- end
21
+ end
20
22
 
21
- def run(*args)
22
- project_name = args.shift
23
- if project_name.nil?
24
- puts 'Need a project_name!'
25
- exit!
23
+ def run(*args)
24
+ project_name = args.shift
25
+ if project_name.nil?
26
+ puts 'Need a project_name!'
27
+ exit!
28
+ end
29
+ Seiya.gen_project_file project_name
26
30
  end
27
- Seiya.gen_project_file project_name
28
31
  end
29
32
  end
30
33
  end
@@ -1,13 +1,15 @@
1
1
  require 'seiya/command'
2
- module Contrib
3
- module Commands
4
- class Gentask < Seiya::Command
5
- def summary
6
- 'Generate new task using pre-defined templates'
7
- end
8
2
 
9
- def usage
10
- 'Usage
3
+ module Seiya
4
+ module Contrib
5
+ module Commands
6
+ class Gentask < Seiya::Command
7
+ def summary
8
+ 'Generate new task using pre-defined templates'
9
+ end
10
+
11
+ def usage
12
+ 'Usage
11
13
  =====
12
14
  seiya gentask [options] <name> <domain>
13
15
 
@@ -16,16 +18,17 @@ Generate new task using pre-defined templates
16
18
  Options
17
19
  =======
18
20
  --help, -h show this help message and exit'
19
- end
21
+ end
20
22
 
21
- def run(*args)
22
- task_name = args.shift
23
- task_domain = args.shift
24
- if task_name.nil?
25
- puts 'Need a task_name!'
26
- exit!
23
+ def run(*args)
24
+ task_name = args.shift
25
+ task_domain = args.shift
26
+ if task_name.nil?
27
+ puts 'Need a task_name!'
28
+ exit!
29
+ end
30
+ Seiya.gen_task_file task_name, task_domain
27
31
  end
28
- Seiya.gen_task_file task_name, task_domain
29
32
  end
30
33
  end
31
34
  end
@@ -0,0 +1,16 @@
1
+ require 'seiya/middleware'
2
+ require 'seiya/settings'
3
+
4
+ module Seiya
5
+ module Contrib
6
+ module RequestMiddlewares
7
+ class RandomUserAgentMiddleware < Seiya::RequestMiddleware
8
+ def process_request(request)
9
+ headers = request.headers
10
+ headers = {} unless headers.is_a? Hash
11
+ headers['User-Agent'] = Seiya::Settings::USER_AGENTS.sample
12
+ end
13
+ end
14
+ end
15
+ end
16
+ end
@@ -1,10 +1,12 @@
1
1
  require 'seiya/pipeline'
2
2
 
3
- module Contrib
4
- module Pipelines
5
- class BasePipeline < Seiya::Pipeline
6
- def process_item(item)
7
- item
3
+ module Seiya
4
+ module Contrib
5
+ module Pipelines
6
+ class BasePipeline < Seiya::Pipeline
7
+ def process_item(item)
8
+ item
9
+ end
8
10
  end
9
11
  end
10
12
  end
data/lib/seiya/contrib.rb CHANGED
@@ -1,2 +1,3 @@
1
1
  require 'seiya/contrib/pipelines'
2
2
  require 'seiya/contrib/commands'
3
+ require 'seiya/contrib/middlewares'
@@ -0,0 +1,7 @@
1
+ module Seiya
2
+ class RequestMiddleware
3
+ def process_request(request)
4
+ request
5
+ end
6
+ end
7
+ end
@@ -0,0 +1,13 @@
1
+ module Seiya
2
+ def process_item(item)
3
+ @pipelines.each do |p|
4
+ item = p.process_item item
5
+ end
6
+ end
7
+
8
+ def process_request(request)
9
+ @request_middlewares.each do |rm|
10
+ rm.process_request request
11
+ end
12
+ end
13
+ end
data/lib/seiya/request.rb CHANGED
@@ -5,15 +5,20 @@ require 'seiya/response'
5
5
 
6
6
  module Seiya
7
7
  class Request
8
- def initialize(url, *args, method: 'get')
8
+ attr_reader :url
9
+ attr_accessor :params, :headers
10
+
11
+ def initialize(url, *args, params: {}, headers: {}, method: 'get')
9
12
  @url = url
10
13
  @method = method.upcase
11
14
  @args = args
15
+ @params = params
16
+ @headers = headers
12
17
  @httpclient = HTTPClient.new
13
18
  end
14
19
 
15
20
  def get_response
16
- Response.new @httpclient.send(@method.downcase, @url, *@args)
21
+ Response.new @httpclient.send(@method.downcase, @url, @params, @headers, *@args)
17
22
  end
18
23
 
19
24
  def register(&block)
@@ -5,7 +5,15 @@ module Seiya
5
5
  end
6
6
 
7
7
  def url
8
- @resp.http_header.request_uri.to_s
8
+ @resp.header.request_uri.to_s
9
+ end
10
+
11
+ def header
12
+ @resp.header
13
+ end
14
+
15
+ def headers
16
+ @resp.headers
9
17
  end
10
18
 
11
19
  def body
@@ -11,7 +11,9 @@ module Seiya
11
11
 
12
12
  def add_requests(requests)
13
13
  requests.each do |request|
14
- @request_q << request if request.registered?
14
+ next unless request.registered?
15
+ Seiya.process_request request
16
+ @request_q << request
15
17
  end
16
18
  run unless @run
17
19
  end
@@ -1,8 +1,34 @@
1
1
  module Seiya
2
2
  module Settings
3
3
  PIPELINES = {
4
- 'seiya/contrib|Contrib::Pipelines::BasePipeline' => 2
4
+ 'seiya/contrib|Seiya::Contrib::Pipelines::BasePipeline' => 0
5
5
  }
6
- COMMANDS = 'seiya/contrib|Contrib::Commands'
6
+ REQUEST_MIDDLEWARES = {
7
+ 'seiya/contrib|Seiya::Contrib::RequestMiddlewares::RandomUserAgentMiddleware' => 0
8
+ }
9
+ COMMANDS = 'seiya/contrib|Seiya::Contrib::Commands'
10
+ USER_AGENTS = [
11
+ 'Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/535.7 (KHTML, like Gecko) Chrome/16.0.912.36 Safari/535.7',
12
+ 'Mozilla/5.0 (Windows NT 6.2; Win64; x64; rv:16.0) Gecko/16.0 Firefox/16.0',
13
+ 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_7_3) AppleWebKit/534.55.3 (KHTML, like Gecko) Version/5.1.3 Safari/534.53.10',
14
+ 'Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.1 (KHTML, like Gecko) Chrome/22.0.1207.1 Safari/537.1',
15
+ 'Mozilla/5.0 (X11; CrOS i686 2268.111.0) AppleWebKit/536.11 (KHTML, like Gecko) Chrome/20.0.1132.57 Safari/536.11',
16
+ 'Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/536.6 (KHTML, like Gecko) Chrome/20.0.1092.0 Safari/536.6',
17
+ 'Mozilla/5.0 (Windows NT 6.2) AppleWebKit/536.6 (KHTML, like Gecko) Chrome/20.0.1090.0 Safari/536.6',
18
+ 'Mozilla/5.0 (Windows NT 6.2; WOW64) AppleWebKit/537.1 (KHTML, like Gecko) Chrome/19.77.34.5 Safari/537.1',
19
+ 'Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/536.5 (KHTML, like Gecko) Chrome/19.0.1084.9 Safari/536.5',
20
+ 'Mozilla/5.0 (Windows NT 6.0) AppleWebKit/536.5 (KHTML, like Gecko) Chrome/19.0.1084.36 Safari/536.5',
21
+ 'Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/536.3 (KHTML, like Gecko) Chrome/19.0.1063.0 Safari/536.3',
22
+ 'Mozilla/5.0 (Windows NT 5.1) AppleWebKit/536.3 (KHTML, like Gecko) Chrome/19.0.1063.0 Safari/536.3',
23
+ 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_8_0) AppleWebKit/536.3 (KHTML, like Gecko) Chrome/19.0.1063.0 Safari/536.3',
24
+ 'Mozilla/5.0 (Windows NT 6.2) AppleWebKit/536.3 (KHTML, like Gecko) Chrome/19.0.1062.0 Safari/536.3',
25
+ 'Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/536.3 (KHTML, like Gecko) Chrome/19.0.1062.0 Safari/536.3',
26
+ 'Mozilla/5.0 (Windows NT 6.2) AppleWebKit/536.3 (KHTML, like Gecko) Chrome/19.0.1061.1 Safari/536.3',
27
+ 'Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/536.3 (KHTML, like Gecko) Chrome/19.0.1061.1 Safari/536.3',
28
+ 'Mozilla/5.0 (Windows NT 6.1) AppleWebKit/536.3 (KHTML, like Gecko) Chrome/19.0.1061.1 Safari/536.3',
29
+ 'Mozilla/5.0 (Windows NT 6.2) AppleWebKit/536.3 (KHTML, like Gecko) Chrome/19.0.1061.0 Safari/536.3',
30
+ 'Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/535.24 (KHTML, like Gecko) Chrome/19.0.1055.1 Safari/535.24',
31
+ 'Mozilla/5.0 (Windows NT 6.2; WOW64) AppleWebKit/535.24 (KHTML, like Gecko) Chrome/19.0.1055.1 Safari/535.24',
32
+ ]
7
33
  end
8
34
  end
data/lib/seiya/util.rb CHANGED
@@ -8,6 +8,21 @@ module Seiya
8
8
  end
9
9
  end
10
10
 
11
+ def argument_to_hash(args, *field)
12
+ return nil if args.empty?
13
+ if args.size == 1 and Hash === args[0]
14
+ h = args[0]
15
+ if field.any? { |f| h.key?(f) }
16
+ return h
17
+ end
18
+ end
19
+ h = {}
20
+ field.each_with_index do |e, idx|
21
+ h[e] = args[idx]
22
+ end
23
+ h
24
+ end
25
+
11
26
  def processors_in_use
12
27
  procs=[]
13
28
  Dir.glob('/proc/*/stat') do |filename|
data/lib/seiya/version.rb CHANGED
@@ -1,3 +1,3 @@
1
1
  module Seiya
2
- VERSION = '0.0.7.5'
2
+ VERSION = '0.0.8'
3
3
  end
data/lib/seiya.rb CHANGED
@@ -1,22 +1,20 @@
1
1
  require 'fileutils'
2
+ require 'inifile'
3
+ require 'seiya/util'
2
4
  require 'seiya/version'
3
5
  require 'seiya/request'
4
6
  require 'seiya/task'
5
7
  require 'seiya/item'
6
8
  require 'seiya/pipeline'
9
+ require 'seiya/middleware'
7
10
  require 'seiya/settings'
8
11
  require 'seiya/command'
9
12
  require 'seiya/support'
13
+ require 'seiya/processer'
10
14
 
11
15
  module Seiya
12
16
  extend self
13
17
 
14
- def process_item(item)
15
- @pipelines.each do |p|
16
- item = p.process_item item
17
- end
18
- end
19
-
20
18
  def get_const!(require_str, const_str)
21
19
  begin
22
20
  require require_str
@@ -78,34 +76,52 @@ module Seiya
78
76
  $:.unshift load_path unless load_path.nil?
79
77
  end
80
78
 
79
+ def component_instance_variables(*variable_names)
80
+ variable_names.each do |variable_name|
81
+ variable_name = variable_name.to_s
82
+ const_name = variable_name.upcase
83
+ super_class = Seiya.const_get variable_name.sub(/s$/, '').camelize
84
+ vars = Settings.const_get const_name
85
+ begin
86
+ vars.merge! Util.get_const "#{@settings_const_str}::#{const_name}"
87
+ rescue NameError
88
+ # ignored
89
+ end
90
+
91
+ vars = {} unless vars.is_a? Hash
92
+
93
+ vars = vars.select do |_, v|
94
+ v >= 0
95
+ end.sort_by do |_, v|
96
+ v
97
+ end.to_h
98
+
99
+ vars = vars.keys.map do |k|
100
+ require_str, const_str = k.split '|'
101
+ klass = get_const require_str, const_str
102
+ klass.new
103
+ end.select do |p|
104
+ p.is_a? super_class
105
+ end
106
+
107
+ instance_variable_set '@' << variable_name, vars
108
+ end
109
+ end
110
+
81
111
  def setup(conf_file: 'seiya.ini')
82
112
  settings_const_str = ''
83
113
  if File.exist? conf_file
84
- load_path = File.dirname File.expand_path(conf_file)
85
- extend_load_path load_path
114
+ path = File.dirname File.expand_path(conf_file)
115
+ extend_load_path path
86
116
 
87
- require 'inifile'
88
- require 'seiya/util'
89
117
  conf = IniFile.load conf_file
90
118
  settings_file = conf.to_h.fetch('global', {}).fetch('settings', 'settings')
91
119
  settings_require_str, settings_const_str = settings_file.split '|'
120
+ @settings_const_str = settings_const_str
92
121
  require settings_require_str
93
122
  end
94
123
 
95
- pipelines = Settings::PIPELINES
96
- begin
97
- pipelines.merge! Util.get_const "#{settings_const_str}::PIPELINES"
98
- rescue NameError
99
- # ignored
100
- end
101
-
102
- pipelines = pipelines.sort_by { |_, v| v }.to_h
103
-
104
- @pipelines = pipelines.keys.map do |k|
105
- require_str, const_str = k.split '|'
106
- klass = get_const require_str, const_str
107
- klass.new
108
- end
124
+ component_instance_variables :pipelines, :request_middlewares
109
125
 
110
126
  commands = [Settings::COMMANDS]
111
127
  begin
@@ -0,0 +1,16 @@
1
+ require 'paint'
2
+ require 'seiya'
3
+
4
+ module RequestMiddlewares
5
+ class MyMiddleware < Seiya::RequestMiddleware
6
+ def process_request(request)
7
+ puts Paint['I am MyMiddleware!', :red]
8
+ puts Paint["User-Agent: #{request.headers['User-Agent']}", :random]
9
+ end
10
+ end
11
+ class MySubMiddleware < Seiya::RequestMiddleware
12
+ def process_request(request)
13
+ puts Paint['I am MySubMiddleware!', :green]
14
+ end
15
+ end
16
+ end
@@ -1,17 +1,18 @@
1
+ require 'paint'
1
2
  require 'seiya'
2
3
 
3
4
  module Pipelines
4
5
  class A < Seiya::Pipeline
5
6
  def process_item(item)
6
- p 'I am A Pipeline'
7
+ puts Paint['I am A Pipeline', :yellow]
7
8
  item[:pipeline] = 'A'
8
9
  item
9
10
  end
10
11
  end
11
12
  class B < Seiya::Pipeline
12
13
  def process_item(item)
13
- p 'I am B Pipeline'
14
- p item
14
+ puts Paint['I am B Pipeline', :blue]
15
+ puts item.to_json
15
16
  item
16
17
  end
17
18
  end
@@ -3,5 +3,9 @@ module Settings
3
3
  'aa/pipelines/t|Pipelines::A' => 1,
4
4
  'aa/pipelines/t|Pipelines::B' => 2,
5
5
  }
6
+ REQUEST_MIDDLEWARES = {
7
+ 'aa/middlewares|RequestMiddlewares::MyMiddleware' => 2,
8
+ 'aa/middlewares|RequestMiddlewares::MySubMiddleware' => 1,
9
+ }
6
10
  COMMANDS = 'aa/commands|Commands'
7
11
  end
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: seiya
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.0.7.5
4
+ version: 0.0.8
5
5
  platform: ruby
6
6
  authors:
7
7
  - yetone
@@ -26,9 +26,12 @@ files:
26
26
  - lib/seiya/contrib/commands/crawl.rb
27
27
  - lib/seiya/contrib/commands/create.rb
28
28
  - lib/seiya/contrib/commands/gentask.rb
29
+ - lib/seiya/contrib/middlewares.rb
29
30
  - lib/seiya/contrib/pipelines.rb
30
31
  - lib/seiya/item.rb
32
+ - lib/seiya/middleware.rb
31
33
  - lib/seiya/pipeline.rb
34
+ - lib/seiya/processer.rb
32
35
  - lib/seiya/request.rb
33
36
  - lib/seiya/response.rb
34
37
  - lib/seiya/scheduler.rb
@@ -40,6 +43,7 @@ files:
40
43
  - sample/test/aa/commands.rb
41
44
  - sample/test/aa/commands/sing.rb
42
45
  - sample/test/aa/items.rb
46
+ - sample/test/aa/middlewares.rb
43
47
  - sample/test/aa/pipelines/t.rb
44
48
  - sample/test/aa/settings.rb
45
49
  - sample/test/aa/tasks.rb