rails_spider 0.1.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (67) hide show
  1. checksums.yaml +7 -0
  2. data/LICENSE +165 -0
  3. data/README.md +33 -0
  4. data/app/assets/config/the_spider_manifest.js +4 -0
  5. data/app/assets/javascripts/the_spider/application.js +1 -0
  6. data/app/assets/stylesheets/the_spider/application.css +4 -0
  7. data/app/controllers/the_spider/application_controller.rb +9 -0
  8. data/app/controllers/the_spider/locals_controller.rb +62 -0
  9. data/app/controllers/the_spider/works_controller.rb +60 -0
  10. data/app/helpers/the_spider/application_helper.rb +4 -0
  11. data/app/helpers/the_spider/locals_helper.rb +4 -0
  12. data/app/helpers/the_spider/works_helper.rb +4 -0
  13. data/app/jobs/the_spider/application_job.rb +4 -0
  14. data/app/jobs/the_spider/parser_job.rb +11 -0
  15. data/app/jobs/the_spider/work_job.rb +11 -0
  16. data/app/mailers/the_spider/application_mailer.rb +6 -0
  17. data/app/models/rails_spider/application_record.rb +5 -0
  18. data/app/models/rails_spider/cookie.rb +9 -0
  19. data/app/models/rails_spider/failed_url.rb +7 -0
  20. data/app/models/rails_spider/local.rb +14 -0
  21. data/app/models/rails_spider/work.rb +24 -0
  22. data/app/views/layouts/the_spider/application.html.erb +14 -0
  23. data/app/views/the_spider/locals/_form.html.erb +17 -0
  24. data/app/views/the_spider/locals/edit.html.erb +6 -0
  25. data/app/views/the_spider/locals/index.html.erb +25 -0
  26. data/app/views/the_spider/locals/new.html.erb +5 -0
  27. data/app/views/the_spider/locals/show.html.erb +4 -0
  28. data/app/views/the_spider/works/_form.html.erb +9 -0
  29. data/app/views/the_spider/works/edit.html.erb +6 -0
  30. data/app/views/the_spider/works/index.html.erb +44 -0
  31. data/app/views/the_spider/works/new.html.erb +5 -0
  32. data/app/views/the_spider/works/show.html.erb +4 -0
  33. data/config/routes.rb +8 -0
  34. data/config/schedule.rb +35 -0
  35. data/db/migrate/20170502153051_rails_spider_init.rb +38 -0
  36. data/lib/config/config.rb +27 -0
  37. data/lib/config/keywords.json +22 -0
  38. data/lib/config/proxy.json +10 -0
  39. data/lib/helper/helper.rb +6 -0
  40. data/lib/helper/location_helper.rb +46 -0
  41. data/lib/helper/price_helper.rb +23 -0
  42. data/lib/helper/tag_helper.rb +17 -0
  43. data/lib/helper/text_helper.rb +41 -0
  44. data/lib/helper/time_helper.rb +140 -0
  45. data/lib/logger.rb +146 -0
  46. data/lib/proxy/allproxylists.txt +2366 -0
  47. data/lib/proxy/proxy.rb +216 -0
  48. data/lib/proxy/proxylists.txt +625 -0
  49. data/lib/rails_spider.rb +10 -0
  50. data/lib/rails_spider/engine.rb +9 -0
  51. data/lib/rails_spider/fetchers.rb +2 -0
  52. data/lib/rails_spider/fetchers/base.rb +146 -0
  53. data/lib/rails_spider/fetchers/mechanize.rb +83 -0
  54. data/lib/rails_spider/fetchers/witar.rb +73 -0
  55. data/lib/rails_spider/parser.rb +14 -0
  56. data/lib/rails_spider/parser/szlawyers.rb +26 -0
  57. data/lib/rails_spider/resource.rb +58 -0
  58. data/lib/rails_spider/strategies.rb +6 -0
  59. data/lib/rails_spider/version.rb +3 -0
  60. data/lib/sync_qiniu.rb +35 -0
  61. data/lib/sync_qiniu/getimages.rb +98 -0
  62. data/lib/sync_qiniu/getimages_info.rb +37 -0
  63. data/lib/sync_qiniu/getlocation.rb +48 -0
  64. data/lib/sync_qiniu/getproxy.rb +95 -0
  65. data/lib/tasks/the_spider_tasks.rake +4 -0
  66. data/rakefile +284 -0
  67. metadata +165 -0
checksums.yaml ADDED
@@ -0,0 +1,7 @@
1
+ ---
2
+ SHA256:
3
+ metadata.gz: d9c9f8d63d2d08f317bf12639ca4a407650632c8060cfe86bdffbb29eb120b03
4
+ data.tar.gz: 5d83cf89db880b21255990b1c14b9eb545cfd4e92c1ab918df5a65eb6142219f
5
+ SHA512:
6
+ metadata.gz: 6be8dcf45f11b214314586b4035ddace9e41bb823d4c94a4d01797b62b0525b975df392560926d2bab30e392d1481e2cfec61d49db9f5938f38dca9c24564f17
7
+ data.tar.gz: 1058ed875f9ecd7cde8029af7b2811b619dfc77c01ac526ce2ce92ff2800d452fe42bf47663f5dda7e09bded946775e2d64295a8f0b36c7b5d6e901b59bd3ff4
data/LICENSE ADDED
@@ -0,0 +1,165 @@
1
+ GNU LESSER GENERAL PUBLIC LICENSE
2
+ Version 3, 29 June 2007
3
+
4
+ Copyright (C) 2018 Mingyuan Qin.
5
+ Everyone is permitted to copy and distribute verbatim copies
6
+ of this license document, but changing it is not allowed.
7
+
8
+
9
+ This version of the GNU Lesser General Public License incorporates
10
+ the terms and conditions of version 3 of the GNU General Public
11
+ License, supplemented by the additional permissions listed below.
12
+
13
+ 0. Additional Definitions.
14
+
15
+ As used herein, "this License" refers to version 3 of the GNU Lesser
16
+ General Public License, and the "GNU GPL" refers to version 3 of the GNU
17
+ General Public License.
18
+
19
+ "The Library" refers to a covered work governed by this License,
20
+ other than an Application or a Combined Work as defined below.
21
+
22
+ An "Application" is any work that makes use of an interface provided
23
+ by the Library, but which is not otherwise based on the Library.
24
+ Defining a subclass of a class defined by the Library is deemed a mode
25
+ of using an interface provided by the Library.
26
+
27
+ A "Combined Work" is a work produced by combining or linking an
28
+ Application with the Library. The particular version of the Library
29
+ with which the Combined Work was made is also called the "Linked
30
+ Version".
31
+
32
+ The "Minimal Corresponding Source" for a Combined Work means the
33
+ Corresponding Source for the Combined Work, excluding any source code
34
+ for portions of the Combined Work that, considered in isolation, are
35
+ based on the Application, and not on the Linked Version.
36
+
37
+ The "Corresponding Application Code" for a Combined Work means the
38
+ object code and/or source code for the Application, including any data
39
+ and utility programs needed for reproducing the Combined Work from the
40
+ Application, but excluding the System Libraries of the Combined Work.
41
+
42
+ 1. Exception to Section 3 of the GNU GPL.
43
+
44
+ You may convey a covered work under sections 3 and 4 of this License
45
+ without being bound by section 3 of the GNU GPL.
46
+
47
+ 2. Conveying Modified Versions.
48
+
49
+ If you modify a copy of the Library, and, in your modifications, a
50
+ facility refers to a function or data to be supplied by an Application
51
+ that uses the facility (other than as an argument passed when the
52
+ facility is invoked), then you may convey a copy of the modified
53
+ version:
54
+
55
+ a) under this License, provided that you make a good faith effort to
56
+ ensure that, in the event an Application does not supply the
57
+ function or data, the facility still operates, and performs
58
+ whatever part of its purpose remains meaningful, or
59
+
60
+ b) under the GNU GPL, with none of the additional permissions of
61
+ this License applicable to that copy.
62
+
63
+ 3. Object Code Incorporating Material from Library Header Files.
64
+
65
+ The object code form of an Application may incorporate material from
66
+ a header file that is part of the Library. You may convey such object
67
+ code under terms of your choice, provided that, if the incorporated
68
+ material is not limited to numerical parameters, data structure
69
+ layouts and accessors, or small macros, inline functions and templates
70
+ (ten or fewer lines in length), you do both of the following:
71
+
72
+ a) Give prominent notice with each copy of the object code that the
73
+ Library is used in it and that the Library and its use are
74
+ covered by this License.
75
+
76
+ b) Accompany the object code with a copy of the GNU GPL and this license
77
+ document.
78
+
79
+ 4. Combined Works.
80
+
81
+ You may convey a Combined Work under terms of your choice that,
82
+ taken together, effectively do not restrict modification of the
83
+ portions of the Library contained in the Combined Work and reverse
84
+ engineering for debugging such modifications, if you also do each of
85
+ the following:
86
+
87
+ a) Give prominent notice with each copy of the Combined Work that
88
+ the Library is used in it and that the Library and its use are
89
+ covered by this License.
90
+
91
+ b) Accompany the Combined Work with a copy of the GNU GPL and this license
92
+ document.
93
+
94
+ c) For a Combined Work that displays copyright notices during
95
+ execution, include the copyright notice for the Library among
96
+ these notices, as well as a reference directing the user to the
97
+ copies of the GNU GPL and this license document.
98
+
99
+ d) Do one of the following:
100
+
101
+ 0) Convey the Minimal Corresponding Source under the terms of this
102
+ License, and the Corresponding Application Code in a form
103
+ suitable for, and under terms that permit, the user to
104
+ recombine or relink the Application with a modified version of
105
+ the Linked Version to produce a modified Combined Work, in the
106
+ manner specified by section 6 of the GNU GPL for conveying
107
+ Corresponding Source.
108
+
109
+ 1) Use a suitable shared library mechanism for linking with the
110
+ Library. A suitable mechanism is one that (a) uses at run time
111
+ a copy of the Library already present on the user's computer
112
+ system, and (b) will operate properly with a modified version
113
+ of the Library that is interface-compatible with the Linked
114
+ Version.
115
+
116
+ e) Provide Installation Information, but only if you would otherwise
117
+ be required to provide such information under section 6 of the
118
+ GNU GPL, and only to the extent that such information is
119
+ necessary to install and execute a modified version of the
120
+ Combined Work produced by recombining or relinking the
121
+ Application with a modified version of the Linked Version. (If
122
+ you use option 4d0, the Installation Information must accompany
123
+ the Minimal Corresponding Source and Corresponding Application
124
+ Code. If you use option 4d1, you must provide the Installation
125
+ Information in the manner specified by section 6 of the GNU GPL
126
+ for conveying Corresponding Source.)
127
+
128
+ 5. Combined Libraries.
129
+
130
+ You may place library facilities that are a work based on the
131
+ Library side by side in a single library together with other library
132
+ facilities that are not Applications and are not covered by this
133
+ License, and convey such a combined library under terms of your
134
+ choice, if you do both of the following:
135
+
136
+ a) Accompany the combined library with a copy of the same work based
137
+ on the Library, uncombined with any other library facilities,
138
+ conveyed under the terms of this License.
139
+
140
+ b) Give prominent notice with the combined library that part of it
141
+ is a work based on the Library, and explaining where to find the
142
+ accompanying uncombined form of the same work.
143
+
144
+ 6. Revised Versions of the GNU Lesser General Public License.
145
+
146
+ The Free Software Foundation may publish revised and/or new versions
147
+ of the GNU Lesser General Public License from time to time. Such new
148
+ versions will be similar in spirit to the present version, but may
149
+ differ in detail to address new problems or concerns.
150
+
151
+ Each version is given a distinguishing version number. If the
152
+ Library as you received it specifies that a certain numbered version
153
+ of the GNU Lesser General Public License "or any later version"
154
+ applies to it, you have the option of following the terms and
155
+ conditions either of that published version or of any later version
156
+ published by the Free Software Foundation. If the Library as you
157
+ received it does not specify a version number of the GNU Lesser
158
+ General Public License, you may choose any version of the GNU Lesser
159
+ General Public License ever published by the Free Software Foundation.
160
+
161
+ If the Library as you received it specifies that a proxy can decide
162
+ whether future versions of the GNU Lesser General Public License shall
163
+ apply, that proxy's public statement of acceptance of any version is
164
+ permanent authorization for you to choose that version for the
165
+ Library.
data/README.md ADDED
@@ -0,0 +1,33 @@
1
+ # RailsSpider
2
+ Short description and motivation.
3
+
4
+ ## Usage
5
+ How to use my plugin.
6
+ 日志在development.log,错误日志在error.log中
7
+
8
+ 代理IP,存在在文件proxy/proxylists.txt,如要更新代理IP,执行rake rake proxy:proxy_youdaili,会更新proxy/proxylists.txt
9
+
10
+
11
+
12
+ ## Installation
13
+ Add this line to your application's Gemfile:
14
+
15
+ ```ruby
16
+ gem 'rails_spider'
17
+ ```
18
+
19
+ And then execute:
20
+ ```bash
21
+ $ bundle
22
+ ```
23
+
24
+ Or install it yourself as:
25
+ ```bash
26
+ $ gem install rails_spider
27
+ ```
28
+
29
+ ## Contributing
30
+ Contribution directions go here.
31
+
32
+ ## License
33
+ The gem is available as open source under the terms of the [MIT License](http://opensource.org/licenses/MIT).
@@ -0,0 +1,4 @@
1
+ //= link_directory ../javascripts/rails_spider .js
2
+ //= link_directory ../stylesheets/rails_spider .css
3
+ //= link rails_spider/application.js
4
+ //= link rails_spider/application.css
@@ -0,0 +1 @@
1
+ //= require rails-ujs
@@ -0,0 +1,4 @@
1
+ /*
2
+ *= require semantic
3
+ *= require_self
4
+ */
@@ -0,0 +1,9 @@
1
+ module RailsSpider
2
+ class ApplicationController < ActionController::Base
3
+ protect_from_forgery with: :exception
4
+ default_form_builder 'RailsSpiderBuilder' do |config|
5
+
6
+ end
7
+
8
+ end
9
+ end
@@ -0,0 +1,62 @@
1
+ require_dependency "rails_spider/application_controller"
2
+
3
+ module RailsSpider
4
+ class LocalsController < ApplicationController
5
+ before_action :set_local, only: [:show, :edit, :update, :destroy]
6
+
7
+ # GET /locals
8
+ def index
9
+ @locals = Local.page(params[:page])
10
+ end
11
+
12
+ # GET /locals/1
13
+ def show
14
+ end
15
+
16
+ # GET /locals/new
17
+ def new
18
+ @local = Local.new
19
+ end
20
+
21
+ # GET /locals/1/edit
22
+ def edit
23
+ end
24
+
25
+ # POST /locals
26
+ def create
27
+ @local = Local.new(local_params)
28
+
29
+ if @local.save
30
+ redirect_to @local, notice: 'Local was successfully created.'
31
+ else
32
+ render :new
33
+ end
34
+ end
35
+
36
+ # PATCH/PUT /locals/1
37
+ def update
38
+ if @local.update(local_params)
39
+ redirect_to @local, notice: 'Local was successfully updated.'
40
+ else
41
+ render :edit
42
+ end
43
+ end
44
+
45
+ # DELETE /locals/1
46
+ def destroy
47
+ @local.destroy
48
+ redirect_to locals_url, notice: 'Local was successfully destroyed.'
49
+ end
50
+
51
+ private
52
+ # Use callbacks to share common setup or constraints between actions.
53
+ def set_local
54
+ @local = Local.find(params[:id])
55
+ end
56
+
57
+ # Only allow a trusted parameter "white list" through.
58
+ def local_params
59
+ params.fetch(:local, {})
60
+ end
61
+ end
62
+ end
@@ -0,0 +1,60 @@
1
+ require_dependency 'rails_spider/application_controller'
2
+ module RailsSpider
3
+ class WorksController < ApplicationController
4
+ before_action :set_work, only: [:show, :edit, :update, :run, :destroy]
5
+
6
+ def index
7
+ @works = Work.page(params[:page])
8
+ end
9
+
10
+ def show
11
+ end
12
+
13
+ def new
14
+ @work = Work.new
15
+ end
16
+
17
+ def create
18
+ @work = Work.new(work_params)
19
+
20
+ if @work.save
21
+ redirect_to @work, notice: 'Work was successfully created.'
22
+ else
23
+ render :new
24
+ end
25
+ end
26
+
27
+ def edit
28
+ end
29
+
30
+ def update
31
+ if @work.update(work_params)
32
+ redirect_to @work, notice: 'Work was successfully updated.'
33
+ else
34
+ render :edit
35
+ end
36
+ end
37
+
38
+ def run
39
+ WorkJob.perform_later(@work.id)
40
+ end
41
+
42
+ def parser
43
+ ParserJob.perform_later(@work.id)
44
+ end
45
+
46
+ def destroy
47
+ @work.destroy
48
+ redirect_to works_url, notice: 'Work was successfully destroyed.'
49
+ end
50
+
51
+ private
52
+ def set_work
53
+ @work = Work.find(params[:id])
54
+ end
55
+
56
+ def work_params
57
+ params.fetch(:work, {}).permit(:name, :parser_name, :host, :list_path, :item_path, :page_params)
58
+ end
59
+ end
60
+ end
@@ -0,0 +1,4 @@
1
+ module RailsSpider
2
+ module ApplicationHelper
3
+ end
4
+ end
@@ -0,0 +1,4 @@
1
+ module RailsSpider
2
+ module LocalsHelper
3
+ end
4
+ end
@@ -0,0 +1,4 @@
1
+ module RailsSpider
2
+ module WorksHelper
3
+ end
4
+ end
@@ -0,0 +1,4 @@
1
+ module RailsSpider
2
+ class ApplicationJob < ActiveJob::Base
3
+ end
4
+ end
@@ -0,0 +1,11 @@
1
+ module RailsSpider
2
+ class ParserJob < ApplicationJob
3
+ queue_as :default
4
+
5
+ def perform(work_id)
6
+ work = Work.find work_id
7
+ work.parse
8
+ end
9
+
10
+ end
11
+ end
@@ -0,0 +1,11 @@
1
+ module RailsSpider
2
+ class WorkJob < ApplicationJob
3
+ queue_as :default
4
+
5
+ def perform(work_id)
6
+ work = Work.find work_id
7
+ work.resource.run
8
+ end
9
+
10
+ end
11
+ end
@@ -0,0 +1,6 @@
1
+ module RailsSpider
2
+ class ApplicationMailer < ActionMailer::Base
3
+ default from: 'from@example.com'
4
+ layout 'mailer'
5
+ end
6
+ end
@@ -0,0 +1,5 @@
1
+ module RailsSpider
2
+ class ApplicationRecord < ActiveRecord::Base
3
+ self.abstract_class = true
4
+ end
5
+ end
@@ -0,0 +1,9 @@
1
+ module RailsSpider
2
+ class Cookie < ApplicationRecord
3
+ attribute :name, type: String
4
+ attribute :password, type: String
5
+ attribute :domain, type: String
6
+ attribute :value, type: String
7
+
8
+ end
9
+ end
@@ -0,0 +1,7 @@
1
+ module RailsSpider
2
+ class FailedUrl < ApplicationRecord
3
+ attribute :url, type: String
4
+ attribute :source, type: String
5
+ attribute :flag, type: String
6
+ end
7
+ end