rails_spider 0.1.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +7 -0
- data/LICENSE +165 -0
- data/README.md +33 -0
- data/app/assets/config/the_spider_manifest.js +4 -0
- data/app/assets/javascripts/the_spider/application.js +1 -0
- data/app/assets/stylesheets/the_spider/application.css +4 -0
- data/app/controllers/the_spider/application_controller.rb +9 -0
- data/app/controllers/the_spider/locals_controller.rb +62 -0
- data/app/controllers/the_spider/works_controller.rb +60 -0
- data/app/helpers/the_spider/application_helper.rb +4 -0
- data/app/helpers/the_spider/locals_helper.rb +4 -0
- data/app/helpers/the_spider/works_helper.rb +4 -0
- data/app/jobs/the_spider/application_job.rb +4 -0
- data/app/jobs/the_spider/parser_job.rb +11 -0
- data/app/jobs/the_spider/work_job.rb +11 -0
- data/app/mailers/the_spider/application_mailer.rb +6 -0
- data/app/models/rails_spider/application_record.rb +5 -0
- data/app/models/rails_spider/cookie.rb +9 -0
- data/app/models/rails_spider/failed_url.rb +7 -0
- data/app/models/rails_spider/local.rb +14 -0
- data/app/models/rails_spider/work.rb +24 -0
- data/app/views/layouts/the_spider/application.html.erb +14 -0
- data/app/views/the_spider/locals/_form.html.erb +17 -0
- data/app/views/the_spider/locals/edit.html.erb +6 -0
- data/app/views/the_spider/locals/index.html.erb +25 -0
- data/app/views/the_spider/locals/new.html.erb +5 -0
- data/app/views/the_spider/locals/show.html.erb +4 -0
- data/app/views/the_spider/works/_form.html.erb +9 -0
- data/app/views/the_spider/works/edit.html.erb +6 -0
- data/app/views/the_spider/works/index.html.erb +44 -0
- data/app/views/the_spider/works/new.html.erb +5 -0
- data/app/views/the_spider/works/show.html.erb +4 -0
- data/config/routes.rb +8 -0
- data/config/schedule.rb +35 -0
- data/db/migrate/20170502153051_rails_spider_init.rb +38 -0
- data/lib/config/config.rb +27 -0
- data/lib/config/keywords.json +22 -0
- data/lib/config/proxy.json +10 -0
- data/lib/helper/helper.rb +6 -0
- data/lib/helper/location_helper.rb +46 -0
- data/lib/helper/price_helper.rb +23 -0
- data/lib/helper/tag_helper.rb +17 -0
- data/lib/helper/text_helper.rb +41 -0
- data/lib/helper/time_helper.rb +140 -0
- data/lib/logger.rb +146 -0
- data/lib/proxy/allproxylists.txt +2366 -0
- data/lib/proxy/proxy.rb +216 -0
- data/lib/proxy/proxylists.txt +625 -0
- data/lib/rails_spider.rb +10 -0
- data/lib/rails_spider/engine.rb +9 -0
- data/lib/rails_spider/fetchers.rb +2 -0
- data/lib/rails_spider/fetchers/base.rb +146 -0
- data/lib/rails_spider/fetchers/mechanize.rb +83 -0
- data/lib/rails_spider/fetchers/witar.rb +73 -0
- data/lib/rails_spider/parser.rb +14 -0
- data/lib/rails_spider/parser/szlawyers.rb +26 -0
- data/lib/rails_spider/resource.rb +58 -0
- data/lib/rails_spider/strategies.rb +6 -0
- data/lib/rails_spider/version.rb +3 -0
- data/lib/sync_qiniu.rb +35 -0
- data/lib/sync_qiniu/getimages.rb +98 -0
- data/lib/sync_qiniu/getimages_info.rb +37 -0
- data/lib/sync_qiniu/getlocation.rb +48 -0
- data/lib/sync_qiniu/getproxy.rb +95 -0
- data/lib/tasks/the_spider_tasks.rake +4 -0
- data/rakefile +284 -0
- metadata +165 -0
checksums.yaml
ADDED
@@ -0,0 +1,7 @@
|
|
1
|
+
---
|
2
|
+
SHA256:
|
3
|
+
metadata.gz: d9c9f8d63d2d08f317bf12639ca4a407650632c8060cfe86bdffbb29eb120b03
|
4
|
+
data.tar.gz: 5d83cf89db880b21255990b1c14b9eb545cfd4e92c1ab918df5a65eb6142219f
|
5
|
+
SHA512:
|
6
|
+
metadata.gz: 6be8dcf45f11b214314586b4035ddace9e41bb823d4c94a4d01797b62b0525b975df392560926d2bab30e392d1481e2cfec61d49db9f5938f38dca9c24564f17
|
7
|
+
data.tar.gz: 1058ed875f9ecd7cde8029af7b2811b619dfc77c01ac526ce2ce92ff2800d452fe42bf47663f5dda7e09bded946775e2d64295a8f0b36c7b5d6e901b59bd3ff4
|
data/LICENSE
ADDED
@@ -0,0 +1,165 @@
|
|
1
|
+
GNU LESSER GENERAL PUBLIC LICENSE
|
2
|
+
Version 3, 29 June 2007
|
3
|
+
|
4
|
+
Copyright (C) 2018 Mingyuan Qin.
|
5
|
+
Everyone is permitted to copy and distribute verbatim copies
|
6
|
+
of this license document, but changing it is not allowed.
|
7
|
+
|
8
|
+
|
9
|
+
This version of the GNU Lesser General Public License incorporates
|
10
|
+
the terms and conditions of version 3 of the GNU General Public
|
11
|
+
License, supplemented by the additional permissions listed below.
|
12
|
+
|
13
|
+
0. Additional Definitions.
|
14
|
+
|
15
|
+
As used herein, "this License" refers to version 3 of the GNU Lesser
|
16
|
+
General Public License, and the "GNU GPL" refers to version 3 of the GNU
|
17
|
+
General Public License.
|
18
|
+
|
19
|
+
"The Library" refers to a covered work governed by this License,
|
20
|
+
other than an Application or a Combined Work as defined below.
|
21
|
+
|
22
|
+
An "Application" is any work that makes use of an interface provided
|
23
|
+
by the Library, but which is not otherwise based on the Library.
|
24
|
+
Defining a subclass of a class defined by the Library is deemed a mode
|
25
|
+
of using an interface provided by the Library.
|
26
|
+
|
27
|
+
A "Combined Work" is a work produced by combining or linking an
|
28
|
+
Application with the Library. The particular version of the Library
|
29
|
+
with which the Combined Work was made is also called the "Linked
|
30
|
+
Version".
|
31
|
+
|
32
|
+
The "Minimal Corresponding Source" for a Combined Work means the
|
33
|
+
Corresponding Source for the Combined Work, excluding any source code
|
34
|
+
for portions of the Combined Work that, considered in isolation, are
|
35
|
+
based on the Application, and not on the Linked Version.
|
36
|
+
|
37
|
+
The "Corresponding Application Code" for a Combined Work means the
|
38
|
+
object code and/or source code for the Application, including any data
|
39
|
+
and utility programs needed for reproducing the Combined Work from the
|
40
|
+
Application, but excluding the System Libraries of the Combined Work.
|
41
|
+
|
42
|
+
1. Exception to Section 3 of the GNU GPL.
|
43
|
+
|
44
|
+
You may convey a covered work under sections 3 and 4 of this License
|
45
|
+
without being bound by section 3 of the GNU GPL.
|
46
|
+
|
47
|
+
2. Conveying Modified Versions.
|
48
|
+
|
49
|
+
If you modify a copy of the Library, and, in your modifications, a
|
50
|
+
facility refers to a function or data to be supplied by an Application
|
51
|
+
that uses the facility (other than as an argument passed when the
|
52
|
+
facility is invoked), then you may convey a copy of the modified
|
53
|
+
version:
|
54
|
+
|
55
|
+
a) under this License, provided that you make a good faith effort to
|
56
|
+
ensure that, in the event an Application does not supply the
|
57
|
+
function or data, the facility still operates, and performs
|
58
|
+
whatever part of its purpose remains meaningful, or
|
59
|
+
|
60
|
+
b) under the GNU GPL, with none of the additional permissions of
|
61
|
+
this License applicable to that copy.
|
62
|
+
|
63
|
+
3. Object Code Incorporating Material from Library Header Files.
|
64
|
+
|
65
|
+
The object code form of an Application may incorporate material from
|
66
|
+
a header file that is part of the Library. You may convey such object
|
67
|
+
code under terms of your choice, provided that, if the incorporated
|
68
|
+
material is not limited to numerical parameters, data structure
|
69
|
+
layouts and accessors, or small macros, inline functions and templates
|
70
|
+
(ten or fewer lines in length), you do both of the following:
|
71
|
+
|
72
|
+
a) Give prominent notice with each copy of the object code that the
|
73
|
+
Library is used in it and that the Library and its use are
|
74
|
+
covered by this License.
|
75
|
+
|
76
|
+
b) Accompany the object code with a copy of the GNU GPL and this license
|
77
|
+
document.
|
78
|
+
|
79
|
+
4. Combined Works.
|
80
|
+
|
81
|
+
You may convey a Combined Work under terms of your choice that,
|
82
|
+
taken together, effectively do not restrict modification of the
|
83
|
+
portions of the Library contained in the Combined Work and reverse
|
84
|
+
engineering for debugging such modifications, if you also do each of
|
85
|
+
the following:
|
86
|
+
|
87
|
+
a) Give prominent notice with each copy of the Combined Work that
|
88
|
+
the Library is used in it and that the Library and its use are
|
89
|
+
covered by this License.
|
90
|
+
|
91
|
+
b) Accompany the Combined Work with a copy of the GNU GPL and this license
|
92
|
+
document.
|
93
|
+
|
94
|
+
c) For a Combined Work that displays copyright notices during
|
95
|
+
execution, include the copyright notice for the Library among
|
96
|
+
these notices, as well as a reference directing the user to the
|
97
|
+
copies of the GNU GPL and this license document.
|
98
|
+
|
99
|
+
d) Do one of the following:
|
100
|
+
|
101
|
+
0) Convey the Minimal Corresponding Source under the terms of this
|
102
|
+
License, and the Corresponding Application Code in a form
|
103
|
+
suitable for, and under terms that permit, the user to
|
104
|
+
recombine or relink the Application with a modified version of
|
105
|
+
the Linked Version to produce a modified Combined Work, in the
|
106
|
+
manner specified by section 6 of the GNU GPL for conveying
|
107
|
+
Corresponding Source.
|
108
|
+
|
109
|
+
1) Use a suitable shared library mechanism for linking with the
|
110
|
+
Library. A suitable mechanism is one that (a) uses at run time
|
111
|
+
a copy of the Library already present on the user's computer
|
112
|
+
system, and (b) will operate properly with a modified version
|
113
|
+
of the Library that is interface-compatible with the Linked
|
114
|
+
Version.
|
115
|
+
|
116
|
+
e) Provide Installation Information, but only if you would otherwise
|
117
|
+
be required to provide such information under section 6 of the
|
118
|
+
GNU GPL, and only to the extent that such information is
|
119
|
+
necessary to install and execute a modified version of the
|
120
|
+
Combined Work produced by recombining or relinking the
|
121
|
+
Application with a modified version of the Linked Version. (If
|
122
|
+
you use option 4d0, the Installation Information must accompany
|
123
|
+
the Minimal Corresponding Source and Corresponding Application
|
124
|
+
Code. If you use option 4d1, you must provide the Installation
|
125
|
+
Information in the manner specified by section 6 of the GNU GPL
|
126
|
+
for conveying Corresponding Source.)
|
127
|
+
|
128
|
+
5. Combined Libraries.
|
129
|
+
|
130
|
+
You may place library facilities that are a work based on the
|
131
|
+
Library side by side in a single library together with other library
|
132
|
+
facilities that are not Applications and are not covered by this
|
133
|
+
License, and convey such a combined library under terms of your
|
134
|
+
choice, if you do both of the following:
|
135
|
+
|
136
|
+
a) Accompany the combined library with a copy of the same work based
|
137
|
+
on the Library, uncombined with any other library facilities,
|
138
|
+
conveyed under the terms of this License.
|
139
|
+
|
140
|
+
b) Give prominent notice with the combined library that part of it
|
141
|
+
is a work based on the Library, and explaining where to find the
|
142
|
+
accompanying uncombined form of the same work.
|
143
|
+
|
144
|
+
6. Revised Versions of the GNU Lesser General Public License.
|
145
|
+
|
146
|
+
The Free Software Foundation may publish revised and/or new versions
|
147
|
+
of the GNU Lesser General Public License from time to time. Such new
|
148
|
+
versions will be similar in spirit to the present version, but may
|
149
|
+
differ in detail to address new problems or concerns.
|
150
|
+
|
151
|
+
Each version is given a distinguishing version number. If the
|
152
|
+
Library as you received it specifies that a certain numbered version
|
153
|
+
of the GNU Lesser General Public License "or any later version"
|
154
|
+
applies to it, you have the option of following the terms and
|
155
|
+
conditions either of that published version or of any later version
|
156
|
+
published by the Free Software Foundation. If the Library as you
|
157
|
+
received it does not specify a version number of the GNU Lesser
|
158
|
+
General Public License, you may choose any version of the GNU Lesser
|
159
|
+
General Public License ever published by the Free Software Foundation.
|
160
|
+
|
161
|
+
If the Library as you received it specifies that a proxy can decide
|
162
|
+
whether future versions of the GNU Lesser General Public License shall
|
163
|
+
apply, that proxy's public statement of acceptance of any version is
|
164
|
+
permanent authorization for you to choose that version for the
|
165
|
+
Library.
|
data/README.md
ADDED
@@ -0,0 +1,33 @@
|
|
1
|
+
# RailsSpider
|
2
|
+
Short description and motivation.
|
3
|
+
|
4
|
+
## Usage
|
5
|
+
How to use my plugin.
|
6
|
+
日志在development.log,错误日志在error.log中
|
7
|
+
|
8
|
+
代理IP,存在在文件proxy/proxylists.txt,如要更新代理IP,执行rake rake proxy:proxy_youdaili,会更新proxy/proxylists.txt
|
9
|
+
|
10
|
+
|
11
|
+
|
12
|
+
## Installation
|
13
|
+
Add this line to your application's Gemfile:
|
14
|
+
|
15
|
+
```ruby
|
16
|
+
gem 'rails_spider'
|
17
|
+
```
|
18
|
+
|
19
|
+
And then execute:
|
20
|
+
```bash
|
21
|
+
$ bundle
|
22
|
+
```
|
23
|
+
|
24
|
+
Or install it yourself as:
|
25
|
+
```bash
|
26
|
+
$ gem install rails_spider
|
27
|
+
```
|
28
|
+
|
29
|
+
## Contributing
|
30
|
+
Contribution directions go here.
|
31
|
+
|
32
|
+
## License
|
33
|
+
The gem is available as open source under the terms of the [MIT License](http://opensource.org/licenses/MIT).
|
@@ -0,0 +1 @@
|
|
1
|
+
//= require rails-ujs
|
@@ -0,0 +1,62 @@
|
|
1
|
+
require_dependency "rails_spider/application_controller"
|
2
|
+
|
3
|
+
module RailsSpider
|
4
|
+
class LocalsController < ApplicationController
|
5
|
+
before_action :set_local, only: [:show, :edit, :update, :destroy]
|
6
|
+
|
7
|
+
# GET /locals
|
8
|
+
def index
|
9
|
+
@locals = Local.page(params[:page])
|
10
|
+
end
|
11
|
+
|
12
|
+
# GET /locals/1
|
13
|
+
def show
|
14
|
+
end
|
15
|
+
|
16
|
+
# GET /locals/new
|
17
|
+
def new
|
18
|
+
@local = Local.new
|
19
|
+
end
|
20
|
+
|
21
|
+
# GET /locals/1/edit
|
22
|
+
def edit
|
23
|
+
end
|
24
|
+
|
25
|
+
# POST /locals
|
26
|
+
def create
|
27
|
+
@local = Local.new(local_params)
|
28
|
+
|
29
|
+
if @local.save
|
30
|
+
redirect_to @local, notice: 'Local was successfully created.'
|
31
|
+
else
|
32
|
+
render :new
|
33
|
+
end
|
34
|
+
end
|
35
|
+
|
36
|
+
# PATCH/PUT /locals/1
|
37
|
+
def update
|
38
|
+
if @local.update(local_params)
|
39
|
+
redirect_to @local, notice: 'Local was successfully updated.'
|
40
|
+
else
|
41
|
+
render :edit
|
42
|
+
end
|
43
|
+
end
|
44
|
+
|
45
|
+
# DELETE /locals/1
|
46
|
+
def destroy
|
47
|
+
@local.destroy
|
48
|
+
redirect_to locals_url, notice: 'Local was successfully destroyed.'
|
49
|
+
end
|
50
|
+
|
51
|
+
private
|
52
|
+
# Use callbacks to share common setup or constraints between actions.
|
53
|
+
def set_local
|
54
|
+
@local = Local.find(params[:id])
|
55
|
+
end
|
56
|
+
|
57
|
+
# Only allow a trusted parameter "white list" through.
|
58
|
+
def local_params
|
59
|
+
params.fetch(:local, {})
|
60
|
+
end
|
61
|
+
end
|
62
|
+
end
|
@@ -0,0 +1,60 @@
|
|
1
|
+
require_dependency 'rails_spider/application_controller'
|
2
|
+
module RailsSpider
|
3
|
+
class WorksController < ApplicationController
|
4
|
+
before_action :set_work, only: [:show, :edit, :update, :run, :destroy]
|
5
|
+
|
6
|
+
def index
|
7
|
+
@works = Work.page(params[:page])
|
8
|
+
end
|
9
|
+
|
10
|
+
def show
|
11
|
+
end
|
12
|
+
|
13
|
+
def new
|
14
|
+
@work = Work.new
|
15
|
+
end
|
16
|
+
|
17
|
+
def create
|
18
|
+
@work = Work.new(work_params)
|
19
|
+
|
20
|
+
if @work.save
|
21
|
+
redirect_to @work, notice: 'Work was successfully created.'
|
22
|
+
else
|
23
|
+
render :new
|
24
|
+
end
|
25
|
+
end
|
26
|
+
|
27
|
+
def edit
|
28
|
+
end
|
29
|
+
|
30
|
+
def update
|
31
|
+
if @work.update(work_params)
|
32
|
+
redirect_to @work, notice: 'Work was successfully updated.'
|
33
|
+
else
|
34
|
+
render :edit
|
35
|
+
end
|
36
|
+
end
|
37
|
+
|
38
|
+
def run
|
39
|
+
WorkJob.perform_later(@work.id)
|
40
|
+
end
|
41
|
+
|
42
|
+
def parser
|
43
|
+
ParserJob.perform_later(@work.id)
|
44
|
+
end
|
45
|
+
|
46
|
+
def destroy
|
47
|
+
@work.destroy
|
48
|
+
redirect_to works_url, notice: 'Work was successfully destroyed.'
|
49
|
+
end
|
50
|
+
|
51
|
+
private
|
52
|
+
def set_work
|
53
|
+
@work = Work.find(params[:id])
|
54
|
+
end
|
55
|
+
|
56
|
+
def work_params
|
57
|
+
params.fetch(:work, {}).permit(:name, :parser_name, :host, :list_path, :item_path, :page_params)
|
58
|
+
end
|
59
|
+
end
|
60
|
+
end
|