scrappy 0.4.5 → 0.4.6
Sign up to get free protection for your applications and to get access to all the features.
- data/History.txt +4 -0
- data/lib/scrappy.rb +1 -1
- data/lib/scrappy/server/admin.rb +1 -1
- data/lib/scrappy/server/server.rb +3 -3
- data/scrappy.gemspec +10 -12
- metadata +106 -149
data/History.txt
CHANGED
data/lib/scrappy.rb
CHANGED
data/lib/scrappy/server/admin.rb
CHANGED
@@ -18,7 +18,7 @@ module Scrappy
|
|
18
18
|
app.get '/javascript' do
|
19
19
|
fragments = agent.fragments_for(Scrappy::Kb.extractors, params[:uri])
|
20
20
|
content_type 'application/javascript'
|
21
|
-
"window.scrappy_extractor=#{fragments.any?};" + open("#{settings.
|
21
|
+
"window.scrappy_extractor=#{fragments.any?};" + open("#{settings.public_folder}/javascripts/annotator.js").read
|
22
22
|
end
|
23
23
|
|
24
24
|
app.get '/help' do
|
@@ -12,9 +12,9 @@ module Scrappy
|
|
12
12
|
register Admin if Scrappy::Options.admin
|
13
13
|
|
14
14
|
enable :sessions
|
15
|
-
set :root,
|
16
|
-
set :views,
|
17
|
-
set :
|
15
|
+
set :root, File.join(File.dirname(__FILE__), '..', '..', '..')
|
16
|
+
set :views, Proc.new { File.join(root, "views") }
|
17
|
+
set :public_folder, Proc.new { File.join(root, "public") }
|
18
18
|
|
19
19
|
get '/:format/*' do |format, url|
|
20
20
|
process_request :get, format, url, params[:callback]
|
data/scrappy.gemspec
CHANGED
@@ -1,31 +1,29 @@
|
|
1
1
|
# -*- encoding: utf-8 -*-
|
2
2
|
|
3
3
|
Gem::Specification.new do |s|
|
4
|
-
s.name =
|
5
|
-
s.version = "0.4.
|
4
|
+
s.name = "scrappy"
|
5
|
+
s.version = "0.4.6"
|
6
6
|
|
7
7
|
s.required_rubygems_version = Gem::Requirement.new(">= 1.2") if s.respond_to? :required_rubygems_version=
|
8
8
|
s.authors = ["Jose Ignacio"]
|
9
|
-
s.date =
|
10
|
-
s.
|
11
|
-
s.
|
12
|
-
s.email = %q{joseignacio.fernandez@gmail.com}
|
9
|
+
s.date = "2011-10-05"
|
10
|
+
s.description = "RDF web scraper"
|
11
|
+
s.email = "joseignacio.fernandez@gmail.com"
|
13
12
|
s.executables = ["scrappy"]
|
14
13
|
s.extra_rdoc_files = ["README.rdoc", "bin/scrappy", "extractors/elmundo.yarf", "lib/scrappy.rb", "lib/scrappy/agent/agent.rb", "lib/scrappy/agent/blind_agent.rb", "lib/scrappy/agent/cache.rb", "lib/scrappy/agent/dumper.rb", "lib/scrappy/agent/map_reduce.rb", "lib/scrappy/extractor/extractor.rb", "lib/scrappy/extractor/formats.rb", "lib/scrappy/extractor/fragment.rb", "lib/scrappy/extractor/selector.rb", "lib/scrappy/extractor/selectors/base_uri.rb", "lib/scrappy/extractor/selectors/css.rb", "lib/scrappy/extractor/selectors/new_uri.rb", "lib/scrappy/extractor/selectors/root.rb", "lib/scrappy/extractor/selectors/section.rb", "lib/scrappy/extractor/selectors/slice.rb", "lib/scrappy/extractor/selectors/uri.rb", "lib/scrappy/extractor/selectors/uri_pattern.rb", "lib/scrappy/extractor/selectors/visual.rb", "lib/scrappy/extractor/selectors/xpath.rb", "lib/scrappy/learning/optimizer.rb", "lib/scrappy/learning/trainer.rb", "lib/scrappy/repository.rb", "lib/scrappy/server/admin.rb", "lib/scrappy/server/errors.rb", "lib/scrappy/server/helpers.rb", "lib/scrappy/server/server.rb", "lib/scrappy/support.rb"]
|
15
14
|
s.files = ["History.txt", "Manifest", "README.rdoc", "Rakefile", "bin/scrappy", "extractors/elmundo.yarf", "lib/scrappy.rb", "lib/scrappy/agent/agent.rb", "lib/scrappy/agent/blind_agent.rb", "lib/scrappy/agent/cache.rb", "lib/scrappy/agent/dumper.rb", "lib/scrappy/agent/map_reduce.rb", "lib/scrappy/extractor/extractor.rb", "lib/scrappy/extractor/formats.rb", "lib/scrappy/extractor/fragment.rb", "lib/scrappy/extractor/selector.rb", "lib/scrappy/extractor/selectors/base_uri.rb", "lib/scrappy/extractor/selectors/css.rb", "lib/scrappy/extractor/selectors/new_uri.rb", "lib/scrappy/extractor/selectors/root.rb", "lib/scrappy/extractor/selectors/section.rb", "lib/scrappy/extractor/selectors/slice.rb", "lib/scrappy/extractor/selectors/uri.rb", "lib/scrappy/extractor/selectors/uri_pattern.rb", "lib/scrappy/extractor/selectors/visual.rb", "lib/scrappy/extractor/selectors/xpath.rb", "lib/scrappy/learning/optimizer.rb", "lib/scrappy/learning/trainer.rb", "lib/scrappy/repository.rb", "lib/scrappy/server/admin.rb", "lib/scrappy/server/errors.rb", "lib/scrappy/server/helpers.rb", "lib/scrappy/server/server.rb", "lib/scrappy/support.rb", "public/favicon.ico", "public/images/logo.png", "public/images/logo_tiny.png", "public/javascripts/annotator.js", "public/javascripts/remote.js", "public/javascripts/utils.js", "public/stylesheets/application.css", "test/test_helper.rb", "test/test_scrappy.rb", "views/extractors.haml", "views/help.haml", "views/home.haml", "views/layout.haml", "views/patterns.haml", "views/samples.haml", "views/test.haml", "scrappy.gemspec"]
|
16
|
-
s.homepage =
|
15
|
+
s.homepage = "http://github.com/josei/scrappy"
|
17
16
|
s.rdoc_options = ["--line-numbers", "--inline-source", "--title", "Scrappy", "--main", "README.rdoc"]
|
18
17
|
s.require_paths = ["lib"]
|
19
|
-
s.rubyforge_project =
|
20
|
-
s.rubygems_version =
|
21
|
-
s.summary =
|
18
|
+
s.rubyforge_project = "scrappy"
|
19
|
+
s.rubygems_version = "1.8.10"
|
20
|
+
s.summary = "Web scraper that allows producing RDF data out of plain web pages"
|
22
21
|
s.test_files = ["test/test_scrappy.rb", "test/test_helper.rb"]
|
23
22
|
|
24
23
|
if s.respond_to? :specification_version then
|
25
|
-
current_version = Gem::Specification::CURRENT_SPECIFICATION_VERSION
|
26
24
|
s.specification_version = 3
|
27
25
|
|
28
|
-
if Gem::Version.new(Gem::
|
26
|
+
if Gem::Version.new(Gem::VERSION) >= Gem::Version.new('1.2.0') then
|
29
27
|
s.add_runtime_dependency(%q<activesupport>, [">= 2.3.5"])
|
30
28
|
s.add_runtime_dependency(%q<sinatra>, [">= 1.1.2"])
|
31
29
|
s.add_runtime_dependency(%q<thin>, [">= 1.2.7"])
|
metadata
CHANGED
@@ -1,169 +1,132 @@
|
|
1
|
-
--- !ruby/object:Gem::Specification
|
1
|
+
--- !ruby/object:Gem::Specification
|
2
2
|
name: scrappy
|
3
|
-
version: !ruby/object:Gem::Version
|
4
|
-
|
5
|
-
|
6
|
-
- 0
|
7
|
-
- 4
|
8
|
-
- 5
|
9
|
-
version: 0.4.5
|
3
|
+
version: !ruby/object:Gem::Version
|
4
|
+
version: 0.4.6
|
5
|
+
prerelease:
|
10
6
|
platform: ruby
|
11
|
-
authors:
|
7
|
+
authors:
|
12
8
|
- Jose Ignacio
|
13
9
|
autorequire:
|
14
10
|
bindir: bin
|
15
11
|
cert_chain: []
|
16
|
-
|
17
|
-
|
18
|
-
|
19
|
-
dependencies:
|
20
|
-
- !ruby/object:Gem::Dependency
|
12
|
+
date: 2011-10-05 00:00:00.000000000Z
|
13
|
+
dependencies:
|
14
|
+
- !ruby/object:Gem::Dependency
|
21
15
|
name: activesupport
|
22
|
-
|
23
|
-
|
24
|
-
requirements:
|
25
|
-
- -
|
26
|
-
- !ruby/object:Gem::Version
|
27
|
-
segments:
|
28
|
-
- 2
|
29
|
-
- 3
|
30
|
-
- 5
|
16
|
+
requirement: &77861890 !ruby/object:Gem::Requirement
|
17
|
+
none: false
|
18
|
+
requirements:
|
19
|
+
- - ! '>='
|
20
|
+
- !ruby/object:Gem::Version
|
31
21
|
version: 2.3.5
|
32
22
|
type: :runtime
|
33
|
-
version_requirements: *id001
|
34
|
-
- !ruby/object:Gem::Dependency
|
35
|
-
name: sinatra
|
36
23
|
prerelease: false
|
37
|
-
|
38
|
-
|
39
|
-
|
40
|
-
|
41
|
-
|
42
|
-
|
43
|
-
|
44
|
-
|
24
|
+
version_requirements: *77861890
|
25
|
+
- !ruby/object:Gem::Dependency
|
26
|
+
name: sinatra
|
27
|
+
requirement: &77861670 !ruby/object:Gem::Requirement
|
28
|
+
none: false
|
29
|
+
requirements:
|
30
|
+
- - ! '>='
|
31
|
+
- !ruby/object:Gem::Version
|
45
32
|
version: 1.1.2
|
46
33
|
type: :runtime
|
47
|
-
version_requirements: *id002
|
48
|
-
- !ruby/object:Gem::Dependency
|
49
|
-
name: thin
|
50
34
|
prerelease: false
|
51
|
-
|
52
|
-
|
53
|
-
|
54
|
-
|
55
|
-
|
56
|
-
|
57
|
-
|
58
|
-
|
35
|
+
version_requirements: *77861670
|
36
|
+
- !ruby/object:Gem::Dependency
|
37
|
+
name: thin
|
38
|
+
requirement: &77861460 !ruby/object:Gem::Requirement
|
39
|
+
none: false
|
40
|
+
requirements:
|
41
|
+
- - ! '>='
|
42
|
+
- !ruby/object:Gem::Version
|
59
43
|
version: 1.2.7
|
60
44
|
type: :runtime
|
61
|
-
version_requirements: *id003
|
62
|
-
- !ruby/object:Gem::Dependency
|
63
|
-
name: nokogiri
|
64
45
|
prerelease: false
|
65
|
-
|
66
|
-
|
67
|
-
|
68
|
-
|
69
|
-
|
70
|
-
|
71
|
-
|
72
|
-
|
46
|
+
version_requirements: *77861460
|
47
|
+
- !ruby/object:Gem::Dependency
|
48
|
+
name: nokogiri
|
49
|
+
requirement: &77861250 !ruby/object:Gem::Requirement
|
50
|
+
none: false
|
51
|
+
requirements:
|
52
|
+
- - ! '>='
|
53
|
+
- !ruby/object:Gem::Version
|
73
54
|
version: 1.4.1
|
74
55
|
type: :runtime
|
75
|
-
version_requirements: *id004
|
76
|
-
- !ruby/object:Gem::Dependency
|
77
|
-
name: mechanize
|
78
56
|
prerelease: false
|
79
|
-
|
80
|
-
|
81
|
-
|
82
|
-
|
83
|
-
|
84
|
-
|
85
|
-
|
86
|
-
|
57
|
+
version_requirements: *77861250
|
58
|
+
- !ruby/object:Gem::Dependency
|
59
|
+
name: mechanize
|
60
|
+
requirement: &77861020 !ruby/object:Gem::Requirement
|
61
|
+
none: false
|
62
|
+
requirements:
|
63
|
+
- - ! '>='
|
64
|
+
- !ruby/object:Gem::Version
|
87
65
|
version: 1.0.0
|
88
66
|
type: :runtime
|
89
|
-
version_requirements: *id005
|
90
|
-
- !ruby/object:Gem::Dependency
|
91
|
-
name: lightrdf
|
92
67
|
prerelease: false
|
93
|
-
|
94
|
-
|
95
|
-
|
96
|
-
|
97
|
-
|
98
|
-
|
99
|
-
|
100
|
-
|
68
|
+
version_requirements: *77861020
|
69
|
+
- !ruby/object:Gem::Dependency
|
70
|
+
name: lightrdf
|
71
|
+
requirement: &77860800 !ruby/object:Gem::Requirement
|
72
|
+
none: false
|
73
|
+
requirements:
|
74
|
+
- - ! '>='
|
75
|
+
- !ruby/object:Gem::Version
|
101
76
|
version: 0.3.9
|
102
77
|
type: :runtime
|
103
|
-
version_requirements: *id006
|
104
|
-
- !ruby/object:Gem::Dependency
|
105
|
-
name: i18n
|
106
78
|
prerelease: false
|
107
|
-
|
108
|
-
|
109
|
-
|
110
|
-
|
111
|
-
|
112
|
-
|
113
|
-
|
114
|
-
|
79
|
+
version_requirements: *77860800
|
80
|
+
- !ruby/object:Gem::Dependency
|
81
|
+
name: i18n
|
82
|
+
requirement: &77860560 !ruby/object:Gem::Requirement
|
83
|
+
none: false
|
84
|
+
requirements:
|
85
|
+
- - ! '>='
|
86
|
+
- !ruby/object:Gem::Version
|
115
87
|
version: 0.4.2
|
116
88
|
type: :runtime
|
117
|
-
version_requirements: *id007
|
118
|
-
- !ruby/object:Gem::Dependency
|
119
|
-
name: rest-client
|
120
89
|
prerelease: false
|
121
|
-
|
122
|
-
|
123
|
-
|
124
|
-
|
125
|
-
|
126
|
-
|
127
|
-
|
128
|
-
|
90
|
+
version_requirements: *77860560
|
91
|
+
- !ruby/object:Gem::Dependency
|
92
|
+
name: rest-client
|
93
|
+
requirement: &77860340 !ruby/object:Gem::Requirement
|
94
|
+
none: false
|
95
|
+
requirements:
|
96
|
+
- - ! '>='
|
97
|
+
- !ruby/object:Gem::Version
|
129
98
|
version: 1.6.1
|
130
99
|
type: :runtime
|
131
|
-
version_requirements: *id008
|
132
|
-
- !ruby/object:Gem::Dependency
|
133
|
-
name: haml
|
134
100
|
prerelease: false
|
135
|
-
|
136
|
-
|
137
|
-
|
138
|
-
|
139
|
-
|
140
|
-
|
141
|
-
|
142
|
-
|
101
|
+
version_requirements: *77860340
|
102
|
+
- !ruby/object:Gem::Dependency
|
103
|
+
name: haml
|
104
|
+
requirement: &77860130 !ruby/object:Gem::Requirement
|
105
|
+
none: false
|
106
|
+
requirements:
|
107
|
+
- - ! '>='
|
108
|
+
- !ruby/object:Gem::Version
|
143
109
|
version: 3.0.24
|
144
110
|
type: :runtime
|
145
|
-
version_requirements: *id009
|
146
|
-
- !ruby/object:Gem::Dependency
|
147
|
-
name: rack-flash
|
148
111
|
prerelease: false
|
149
|
-
|
150
|
-
|
151
|
-
|
152
|
-
|
153
|
-
|
154
|
-
|
155
|
-
|
156
|
-
|
112
|
+
version_requirements: *77860130
|
113
|
+
- !ruby/object:Gem::Dependency
|
114
|
+
name: rack-flash
|
115
|
+
requirement: &77859920 !ruby/object:Gem::Requirement
|
116
|
+
none: false
|
117
|
+
requirements:
|
118
|
+
- - ! '>='
|
119
|
+
- !ruby/object:Gem::Version
|
157
120
|
version: 0.1.1
|
158
121
|
type: :runtime
|
159
|
-
|
122
|
+
prerelease: false
|
123
|
+
version_requirements: *77859920
|
160
124
|
description: RDF web scraper
|
161
125
|
email: joseignacio.fernandez@gmail.com
|
162
|
-
executables:
|
126
|
+
executables:
|
163
127
|
- scrappy
|
164
128
|
extensions: []
|
165
|
-
|
166
|
-
extra_rdoc_files:
|
129
|
+
extra_rdoc_files:
|
167
130
|
- README.rdoc
|
168
131
|
- bin/scrappy
|
169
132
|
- extractors/elmundo.yarf
|
@@ -195,7 +158,7 @@ extra_rdoc_files:
|
|
195
158
|
- lib/scrappy/server/helpers.rb
|
196
159
|
- lib/scrappy/server/server.rb
|
197
160
|
- lib/scrappy/support.rb
|
198
|
-
files:
|
161
|
+
files:
|
199
162
|
- History.txt
|
200
163
|
- Manifest
|
201
164
|
- README.rdoc
|
@@ -247,42 +210,36 @@ files:
|
|
247
210
|
- views/samples.haml
|
248
211
|
- views/test.haml
|
249
212
|
- scrappy.gemspec
|
250
|
-
has_rdoc: true
|
251
213
|
homepage: http://github.com/josei/scrappy
|
252
214
|
licenses: []
|
253
|
-
|
254
215
|
post_install_message:
|
255
|
-
rdoc_options:
|
216
|
+
rdoc_options:
|
256
217
|
- --line-numbers
|
257
218
|
- --inline-source
|
258
219
|
- --title
|
259
220
|
- Scrappy
|
260
221
|
- --main
|
261
222
|
- README.rdoc
|
262
|
-
require_paths:
|
223
|
+
require_paths:
|
263
224
|
- lib
|
264
|
-
required_ruby_version: !ruby/object:Gem::Requirement
|
265
|
-
|
266
|
-
|
267
|
-
|
268
|
-
|
269
|
-
|
270
|
-
|
271
|
-
|
272
|
-
requirements:
|
273
|
-
- -
|
274
|
-
- !ruby/object:Gem::Version
|
275
|
-
|
276
|
-
- 1
|
277
|
-
- 2
|
278
|
-
version: "1.2"
|
225
|
+
required_ruby_version: !ruby/object:Gem::Requirement
|
226
|
+
none: false
|
227
|
+
requirements:
|
228
|
+
- - ! '>='
|
229
|
+
- !ruby/object:Gem::Version
|
230
|
+
version: '0'
|
231
|
+
required_rubygems_version: !ruby/object:Gem::Requirement
|
232
|
+
none: false
|
233
|
+
requirements:
|
234
|
+
- - ! '>='
|
235
|
+
- !ruby/object:Gem::Version
|
236
|
+
version: '1.2'
|
279
237
|
requirements: []
|
280
|
-
|
281
238
|
rubyforge_project: scrappy
|
282
|
-
rubygems_version: 1.
|
239
|
+
rubygems_version: 1.8.10
|
283
240
|
signing_key:
|
284
241
|
specification_version: 3
|
285
242
|
summary: Web scraper that allows producing RDF data out of plain web pages
|
286
|
-
test_files:
|
243
|
+
test_files:
|
287
244
|
- test/test_scrappy.rb
|
288
245
|
- test/test_helper.rb
|