sunscraper 1.0.0
Sign up to get free protection for your applications and to get access to all the features.
- data/.gitignore +7 -0
- data/.rspec +2 -0
- data/.yardopts +1 -0
- data/Gemfile +4 -0
- data/LICENSE +19 -0
- data/README.md +68 -0
- data/Rakefile +1 -0
- data/ext/.gitignore +4 -0
- data/ext/Makefile +270 -0
- data/ext/extconf.rb +9 -0
- data/ext/sunscraper.cpp +86 -0
- data/ext/sunscraper.h +45 -0
- data/ext/sunscraper.pro +15 -0
- data/ext/sunscraperexternal.cpp +33 -0
- data/ext/sunscraperlibrary.cpp +25 -0
- data/ext/sunscraperlibrary.h +22 -0
- data/ext/sunscraperproxy.cpp +13 -0
- data/ext/sunscraperproxy.h +24 -0
- data/ext/sunscraperthread.cpp +67 -0
- data/ext/sunscraperthread.h +34 -0
- data/lib/sunscraper.rb +50 -0
- data/lib/sunscraper/library.rb +37 -0
- data/spec/spec_helper.rb +4 -0
- data/spec/sunscraper_spec.rb +45 -0
- data/sunscraper.gemspec +23 -0
- metadata +120 -0
data/.gitignore
ADDED
data/.rspec
ADDED
data/.yardopts
ADDED
@@ -0,0 +1 @@
|
|
1
|
+
--no-private --markup markdown - LICENSE
|
data/Gemfile
ADDED
data/LICENSE
ADDED
@@ -0,0 +1,19 @@
|
|
1
|
+
Copyright (C) 2012 Peter Zotov <whitequark@whitequark.org>
|
2
|
+
|
3
|
+
Permission is hereby granted, free of charge, to any person obtaining a copy of
|
4
|
+
this software and associated documentation files (the "Software"), to deal in
|
5
|
+
the Software without restriction, including without limitation the rights to
|
6
|
+
use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies
|
7
|
+
of the Software, and to permit persons to whom the Software is furnished to do
|
8
|
+
so, subject to the following conditions:
|
9
|
+
|
10
|
+
The above copyright notice and this permission notice shall be included in all
|
11
|
+
copies or substantial portions of the Software.
|
12
|
+
|
13
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
14
|
+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
15
|
+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
16
|
+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
17
|
+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
18
|
+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
19
|
+
SOFTWARE.
|
data/README.md
ADDED
@@ -0,0 +1,68 @@
|
|
1
|
+
Sunscraper
|
2
|
+
==========
|
3
|
+
|
4
|
+
Sunscraper is a gem for prerendering pages with hashbang URLs like `http://whatever.com/#!/page`.
|
5
|
+
|
6
|
+
It works by loading content in the embedded web browser and waiting for a JavaScript method to be
|
7
|
+
called.
|
8
|
+
|
9
|
+
HTML = %{
|
10
|
+
<html>
|
11
|
+
<head>
|
12
|
+
<script type="text/javascript">
|
13
|
+
document.addEventListener("DOMContentLoaded", function() {
|
14
|
+
document.getElementById('fuga').textContent =
|
15
|
+
("!skrow tI").split("").reverse().join("");
|
16
|
+
Sunscraper.finish();
|
17
|
+
}, true);
|
18
|
+
</script>
|
19
|
+
</head>
|
20
|
+
<body>
|
21
|
+
<div id='fuga'></div>
|
22
|
+
</body>
|
23
|
+
</html>
|
24
|
+
}
|
25
|
+
|
26
|
+
Sunscraper.scrape_html(HTML).include?('It works!') # => true
|
27
|
+
|
28
|
+
See also [documentation][].
|
29
|
+
|
30
|
+
[documentation]: http://rdoc.info/gems/sunscraper/Sunscraper
|
31
|
+
|
32
|
+
Installation
|
33
|
+
------------
|
34
|
+
|
35
|
+
Sunscraper requires Qt 4.x and QtWebkit packages to be installed on the target system. *Sunscraper is not a Ruby
|
36
|
+
C extension*; it works by building a Qt shared library and loading it through [FFI][].
|
37
|
+
|
38
|
+
[FFI]: http://en.wikipedia.org/wiki/Foreign_Function_Interface
|
39
|
+
|
40
|
+
gem install sunscraper
|
41
|
+
|
42
|
+
Runtime requirements
|
43
|
+
--------------------
|
44
|
+
|
45
|
+
On Linux with Qt versions <= 4.8, Sunscraper requires a running X server and a valid `DISPLAY` environment
|
46
|
+
variable. Consider using [Xvfb][] on a GUI-less production server.
|
47
|
+
|
48
|
+
[Xvfb]: http://www.x.org/releases/X11R7.6/doc/man/man1/Xvfb.1.xhtml
|
49
|
+
|
50
|
+
Compatibility
|
51
|
+
-------------
|
52
|
+
|
53
|
+
Sunscraper should be compatible across all major implementations on all major operating systems, including
|
54
|
+
Ruby MRI 1.9, JRuby, Rubinius and MacRuby running on GNU/Linux, OS X and Windows.
|
55
|
+
|
56
|
+
JRuby versions up to 1.6.5 are known not to work due to a bug in its FFI library.
|
57
|
+
|
58
|
+
Ruby MRI 1.8 is not supported because it has a braindead threading model and will never be because I don't care.
|
59
|
+
|
60
|
+
Thread safety
|
61
|
+
-------------
|
62
|
+
|
63
|
+
Sunscraper is thread-safe.
|
64
|
+
|
65
|
+
License
|
66
|
+
-------
|
67
|
+
|
68
|
+
Sunscraper is distributed under the terms of a MIT license; see LICENSE in the source distribution.
|
data/Rakefile
ADDED
@@ -0,0 +1 @@
|
|
1
|
+
require "bundler/gem_tasks"
|
data/ext/.gitignore
ADDED
data/ext/Makefile
ADDED
@@ -0,0 +1,270 @@
|
|
1
|
+
#############################################################################
|
2
|
+
# Makefile for building: libsunscraper.so.1.0.0
|
3
|
+
# Generated by qmake (2.01a) (Qt 4.7.4) on: Sat Feb 18 05:54:02 2012
|
4
|
+
# Project: sunscraper.pro
|
5
|
+
# Template: lib
|
6
|
+
# Command: /usr/bin/qmake-qt4 -spec /usr/share/qt4/mkspecs/linux-g++ CONFIG+=debug -o Makefile sunscraper.pro
|
7
|
+
#############################################################################
|
8
|
+
|
9
|
+
####### Compiler, tools and options
|
10
|
+
|
11
|
+
CC = gcc
|
12
|
+
CXX = g++
|
13
|
+
DEFINES = -DQT_WEBKIT -DQT_WEBKIT_LIB -DQT_GUI_LIB -DQT_CORE_LIB -DQT_SHARED
|
14
|
+
CFLAGS = -pipe -g -Wall -W -D_REENTRANT -fPIC $(DEFINES)
|
15
|
+
CXXFLAGS = -pipe -g -Wall -W -D_REENTRANT -fPIC $(DEFINES)
|
16
|
+
INCPATH = -I/usr/share/qt4/mkspecs/linux-g++ -I. -I/usr/include/qt4/QtCore -I/usr/include/qt4/QtGui -I/usr/include/qt4/QtWebKit -I/usr/include/qt4 -I.
|
17
|
+
LINK = g++
|
18
|
+
LFLAGS = -shared -Wl,-soname,libsunscraper.so.1
|
19
|
+
LIBS = $(SUBLIBS) -L/usr/lib -lQtWebKit -lQtGui -lQtCore -lpthread
|
20
|
+
AR = ar cqs
|
21
|
+
RANLIB =
|
22
|
+
QMAKE = /usr/bin/qmake-qt4
|
23
|
+
TAR = tar -cf
|
24
|
+
COMPRESS = gzip -9f
|
25
|
+
COPY = cp -f
|
26
|
+
SED = sed
|
27
|
+
COPY_FILE = $(COPY)
|
28
|
+
COPY_DIR = $(COPY) -r
|
29
|
+
STRIP = strip
|
30
|
+
INSTALL_FILE = install -m 644 -p
|
31
|
+
INSTALL_DIR = $(COPY_DIR)
|
32
|
+
INSTALL_PROGRAM = install -m 755 -p
|
33
|
+
DEL_FILE = rm -f
|
34
|
+
SYMLINK = ln -f -s
|
35
|
+
DEL_DIR = rmdir
|
36
|
+
MOVE = mv -f
|
37
|
+
CHK_DIR_EXISTS= test -d
|
38
|
+
MKDIR = mkdir -p
|
39
|
+
|
40
|
+
####### Output directory
|
41
|
+
|
42
|
+
OBJECTS_DIR = ./
|
43
|
+
|
44
|
+
####### Files
|
45
|
+
|
46
|
+
SOURCES = sunscraperlibrary.cpp \
|
47
|
+
sunscraperthread.cpp \
|
48
|
+
sunscraperexternal.cpp \
|
49
|
+
sunscraper.cpp \
|
50
|
+
sunscraperproxy.cpp moc_sunscraperthread.cpp \
|
51
|
+
moc_sunscraper.cpp \
|
52
|
+
moc_sunscraperproxy.cpp
|
53
|
+
OBJECTS = sunscraperlibrary.o \
|
54
|
+
sunscraperthread.o \
|
55
|
+
sunscraperexternal.o \
|
56
|
+
sunscraper.o \
|
57
|
+
sunscraperproxy.o \
|
58
|
+
moc_sunscraperthread.o \
|
59
|
+
moc_sunscraper.o \
|
60
|
+
moc_sunscraperproxy.o
|
61
|
+
DIST = /usr/share/qt4/mkspecs/common/g++.conf \
|
62
|
+
/usr/share/qt4/mkspecs/common/unix.conf \
|
63
|
+
/usr/share/qt4/mkspecs/common/linux.conf \
|
64
|
+
/usr/share/qt4/mkspecs/qconfig.pri \
|
65
|
+
/usr/share/qt4/mkspecs/modules/qt_webkit_version.pri \
|
66
|
+
/usr/share/qt4/mkspecs/features/qt_functions.prf \
|
67
|
+
/usr/share/qt4/mkspecs/features/qt_config.prf \
|
68
|
+
/usr/share/qt4/mkspecs/features/exclusive_builds.prf \
|
69
|
+
/usr/share/qt4/mkspecs/features/default_pre.prf \
|
70
|
+
/usr/share/qt4/mkspecs/features/debug.prf \
|
71
|
+
/usr/share/qt4/mkspecs/features/default_post.prf \
|
72
|
+
/usr/share/qt4/mkspecs/features/warn_on.prf \
|
73
|
+
/usr/share/qt4/mkspecs/features/qt.prf \
|
74
|
+
/usr/share/qt4/mkspecs/features/unix/thread.prf \
|
75
|
+
/usr/share/qt4/mkspecs/features/moc.prf \
|
76
|
+
/usr/share/qt4/mkspecs/features/resources.prf \
|
77
|
+
/usr/share/qt4/mkspecs/features/uic.prf \
|
78
|
+
/usr/share/qt4/mkspecs/features/yacc.prf \
|
79
|
+
/usr/share/qt4/mkspecs/features/lex.prf \
|
80
|
+
/usr/share/qt4/mkspecs/features/include_source_dir.prf \
|
81
|
+
sunscraper.pro
|
82
|
+
QMAKE_TARGET = sunscraper
|
83
|
+
DESTDIR =
|
84
|
+
TARGET = libsunscraper.so.1.0.0
|
85
|
+
TARGETA = libsunscraper.a
|
86
|
+
TARGETD = libsunscraper.so.1.0.0
|
87
|
+
TARGET0 = libsunscraper.so
|
88
|
+
TARGET1 = libsunscraper.so.1
|
89
|
+
TARGET2 = libsunscraper.so.1.0
|
90
|
+
|
91
|
+
first: all
|
92
|
+
####### Implicit rules
|
93
|
+
|
94
|
+
.SUFFIXES: .o .c .cpp .cc .cxx .C
|
95
|
+
|
96
|
+
.cpp.o:
|
97
|
+
$(CXX) -c $(CXXFLAGS) $(INCPATH) -o "$@" "$<"
|
98
|
+
|
99
|
+
.cc.o:
|
100
|
+
$(CXX) -c $(CXXFLAGS) $(INCPATH) -o "$@" "$<"
|
101
|
+
|
102
|
+
.cxx.o:
|
103
|
+
$(CXX) -c $(CXXFLAGS) $(INCPATH) -o "$@" "$<"
|
104
|
+
|
105
|
+
.C.o:
|
106
|
+
$(CXX) -c $(CXXFLAGS) $(INCPATH) -o "$@" "$<"
|
107
|
+
|
108
|
+
.c.o:
|
109
|
+
$(CC) -c $(CFLAGS) $(INCPATH) -o "$@" "$<"
|
110
|
+
|
111
|
+
####### Build rules
|
112
|
+
|
113
|
+
all: Makefile $(TARGET)
|
114
|
+
|
115
|
+
$(TARGET): $(OBJECTS) $(SUBLIBS) $(OBJCOMP)
|
116
|
+
-$(DEL_FILE) $(TARGET) $(TARGET0) $(TARGET1) $(TARGET2)
|
117
|
+
$(LINK) $(LFLAGS) -o $(TARGET) $(OBJECTS) $(LIBS) $(OBJCOMP)
|
118
|
+
-ln -s $(TARGET) $(TARGET0)
|
119
|
+
-ln -s $(TARGET) $(TARGET1)
|
120
|
+
-ln -s $(TARGET) $(TARGET2)
|
121
|
+
|
122
|
+
|
123
|
+
|
124
|
+
staticlib: $(TARGETA)
|
125
|
+
|
126
|
+
$(TARGETA): $(OBJECTS) $(OBJCOMP)
|
127
|
+
-$(DEL_FILE) $(TARGETA)
|
128
|
+
$(AR) $(TARGETA) $(OBJECTS)
|
129
|
+
|
130
|
+
Makefile: sunscraper.pro /usr/share/qt4/mkspecs/linux-g++/qmake.conf /usr/share/qt4/mkspecs/common/g++.conf \
|
131
|
+
/usr/share/qt4/mkspecs/common/unix.conf \
|
132
|
+
/usr/share/qt4/mkspecs/common/linux.conf \
|
133
|
+
/usr/share/qt4/mkspecs/qconfig.pri \
|
134
|
+
/usr/share/qt4/mkspecs/modules/qt_webkit_version.pri \
|
135
|
+
/usr/share/qt4/mkspecs/features/qt_functions.prf \
|
136
|
+
/usr/share/qt4/mkspecs/features/qt_config.prf \
|
137
|
+
/usr/share/qt4/mkspecs/features/exclusive_builds.prf \
|
138
|
+
/usr/share/qt4/mkspecs/features/default_pre.prf \
|
139
|
+
/usr/share/qt4/mkspecs/features/debug.prf \
|
140
|
+
/usr/share/qt4/mkspecs/features/default_post.prf \
|
141
|
+
/usr/share/qt4/mkspecs/features/warn_on.prf \
|
142
|
+
/usr/share/qt4/mkspecs/features/qt.prf \
|
143
|
+
/usr/share/qt4/mkspecs/features/unix/thread.prf \
|
144
|
+
/usr/share/qt4/mkspecs/features/moc.prf \
|
145
|
+
/usr/share/qt4/mkspecs/features/resources.prf \
|
146
|
+
/usr/share/qt4/mkspecs/features/uic.prf \
|
147
|
+
/usr/share/qt4/mkspecs/features/yacc.prf \
|
148
|
+
/usr/share/qt4/mkspecs/features/lex.prf \
|
149
|
+
/usr/share/qt4/mkspecs/features/include_source_dir.prf \
|
150
|
+
/usr/lib/libQtWebKit.prl \
|
151
|
+
/usr/lib/libQtGui.prl \
|
152
|
+
/usr/lib/libQtCore.prl
|
153
|
+
$(QMAKE) -spec /usr/share/qt4/mkspecs/linux-g++ CONFIG+=debug -o Makefile sunscraper.pro
|
154
|
+
/usr/share/qt4/mkspecs/common/g++.conf:
|
155
|
+
/usr/share/qt4/mkspecs/common/unix.conf:
|
156
|
+
/usr/share/qt4/mkspecs/common/linux.conf:
|
157
|
+
/usr/share/qt4/mkspecs/qconfig.pri:
|
158
|
+
/usr/share/qt4/mkspecs/modules/qt_webkit_version.pri:
|
159
|
+
/usr/share/qt4/mkspecs/features/qt_functions.prf:
|
160
|
+
/usr/share/qt4/mkspecs/features/qt_config.prf:
|
161
|
+
/usr/share/qt4/mkspecs/features/exclusive_builds.prf:
|
162
|
+
/usr/share/qt4/mkspecs/features/default_pre.prf:
|
163
|
+
/usr/share/qt4/mkspecs/features/debug.prf:
|
164
|
+
/usr/share/qt4/mkspecs/features/default_post.prf:
|
165
|
+
/usr/share/qt4/mkspecs/features/warn_on.prf:
|
166
|
+
/usr/share/qt4/mkspecs/features/qt.prf:
|
167
|
+
/usr/share/qt4/mkspecs/features/unix/thread.prf:
|
168
|
+
/usr/share/qt4/mkspecs/features/moc.prf:
|
169
|
+
/usr/share/qt4/mkspecs/features/resources.prf:
|
170
|
+
/usr/share/qt4/mkspecs/features/uic.prf:
|
171
|
+
/usr/share/qt4/mkspecs/features/yacc.prf:
|
172
|
+
/usr/share/qt4/mkspecs/features/lex.prf:
|
173
|
+
/usr/share/qt4/mkspecs/features/include_source_dir.prf:
|
174
|
+
/usr/lib/libQtWebKit.prl:
|
175
|
+
/usr/lib/libQtGui.prl:
|
176
|
+
/usr/lib/libQtCore.prl:
|
177
|
+
qmake: FORCE
|
178
|
+
@$(QMAKE) -spec /usr/share/qt4/mkspecs/linux-g++ CONFIG+=debug -o Makefile sunscraper.pro
|
179
|
+
|
180
|
+
dist:
|
181
|
+
@$(CHK_DIR_EXISTS) .tmp/sunscraper1.0.0 || $(MKDIR) .tmp/sunscraper1.0.0
|
182
|
+
$(COPY_FILE) --parents $(SOURCES) $(DIST) .tmp/sunscraper1.0.0/ && $(COPY_FILE) --parents sunscraperlibrary.h sunscraperthread.h sunscraper.h sunscraperproxy.h .tmp/sunscraper1.0.0/ && $(COPY_FILE) --parents sunscraperlibrary.cpp sunscraperthread.cpp sunscraperexternal.cpp sunscraper.cpp sunscraperproxy.cpp .tmp/sunscraper1.0.0/ && (cd `dirname .tmp/sunscraper1.0.0` && $(TAR) sunscraper1.0.0.tar sunscraper1.0.0 && $(COMPRESS) sunscraper1.0.0.tar) && $(MOVE) `dirname .tmp/sunscraper1.0.0`/sunscraper1.0.0.tar.gz . && $(DEL_FILE) -r .tmp/sunscraper1.0.0
|
183
|
+
|
184
|
+
|
185
|
+
clean:compiler_clean
|
186
|
+
-$(DEL_FILE) $(OBJECTS)
|
187
|
+
-$(DEL_FILE) *~ core *.core
|
188
|
+
|
189
|
+
|
190
|
+
####### Sub-libraries
|
191
|
+
|
192
|
+
distclean: clean
|
193
|
+
-$(DEL_FILE) $(TARGET)
|
194
|
+
-$(DEL_FILE) $(TARGET0) $(TARGET1) $(TARGET2) $(TARGETA)
|
195
|
+
-$(DEL_FILE) Makefile
|
196
|
+
|
197
|
+
|
198
|
+
check: first
|
199
|
+
|
200
|
+
mocclean: compiler_moc_header_clean compiler_moc_source_clean
|
201
|
+
|
202
|
+
mocables: compiler_moc_header_make_all compiler_moc_source_make_all
|
203
|
+
|
204
|
+
compiler_moc_header_make_all: moc_sunscraperthread.cpp moc_sunscraper.cpp moc_sunscraperproxy.cpp
|
205
|
+
compiler_moc_header_clean:
|
206
|
+
-$(DEL_FILE) moc_sunscraperthread.cpp moc_sunscraper.cpp moc_sunscraperproxy.cpp
|
207
|
+
moc_sunscraperthread.cpp: sunscraperthread.h
|
208
|
+
/usr/bin/moc-qt4 $(DEFINES) $(INCPATH) sunscraperthread.h -o moc_sunscraperthread.cpp
|
209
|
+
|
210
|
+
moc_sunscraper.cpp: sunscraper.h
|
211
|
+
/usr/bin/moc-qt4 $(DEFINES) $(INCPATH) sunscraper.h -o moc_sunscraper.cpp
|
212
|
+
|
213
|
+
moc_sunscraperproxy.cpp: sunscraperproxy.h
|
214
|
+
/usr/bin/moc-qt4 $(DEFINES) $(INCPATH) sunscraperproxy.h -o moc_sunscraperproxy.cpp
|
215
|
+
|
216
|
+
compiler_rcc_make_all:
|
217
|
+
compiler_rcc_clean:
|
218
|
+
compiler_image_collection_make_all: qmake_image_collection.cpp
|
219
|
+
compiler_image_collection_clean:
|
220
|
+
-$(DEL_FILE) qmake_image_collection.cpp
|
221
|
+
compiler_moc_source_make_all:
|
222
|
+
compiler_moc_source_clean:
|
223
|
+
compiler_uic_make_all:
|
224
|
+
compiler_uic_clean:
|
225
|
+
compiler_yacc_decl_make_all:
|
226
|
+
compiler_yacc_decl_clean:
|
227
|
+
compiler_yacc_impl_make_all:
|
228
|
+
compiler_yacc_impl_clean:
|
229
|
+
compiler_lex_make_all:
|
230
|
+
compiler_lex_clean:
|
231
|
+
compiler_clean: compiler_moc_header_clean
|
232
|
+
|
233
|
+
####### Compile
|
234
|
+
|
235
|
+
sunscraperlibrary.o: sunscraperlibrary.cpp sunscraperlibrary.h \
|
236
|
+
sunscraperthread.h
|
237
|
+
$(CXX) -c $(CXXFLAGS) $(INCPATH) -o sunscraperlibrary.o sunscraperlibrary.cpp
|
238
|
+
|
239
|
+
sunscraperthread.o: sunscraperthread.cpp sunscraperthread.h \
|
240
|
+
sunscraperproxy.h
|
241
|
+
$(CXX) -c $(CXXFLAGS) $(INCPATH) -o sunscraperthread.o sunscraperthread.cpp
|
242
|
+
|
243
|
+
sunscraperexternal.o: sunscraperexternal.cpp sunscraper.h
|
244
|
+
$(CXX) -c $(CXXFLAGS) $(INCPATH) -o sunscraperexternal.o sunscraperexternal.cpp
|
245
|
+
|
246
|
+
sunscraper.o: sunscraper.cpp sunscraper.h \
|
247
|
+
sunscraperlibrary.h \
|
248
|
+
sunscraperthread.h
|
249
|
+
$(CXX) -c $(CXXFLAGS) $(INCPATH) -o sunscraper.o sunscraper.cpp
|
250
|
+
|
251
|
+
sunscraperproxy.o: sunscraperproxy.cpp sunscraperproxy.h
|
252
|
+
$(CXX) -c $(CXXFLAGS) $(INCPATH) -o sunscraperproxy.o sunscraperproxy.cpp
|
253
|
+
|
254
|
+
moc_sunscraperthread.o: moc_sunscraperthread.cpp
|
255
|
+
$(CXX) -c $(CXXFLAGS) $(INCPATH) -o moc_sunscraperthread.o moc_sunscraperthread.cpp
|
256
|
+
|
257
|
+
moc_sunscraper.o: moc_sunscraper.cpp
|
258
|
+
$(CXX) -c $(CXXFLAGS) $(INCPATH) -o moc_sunscraper.o moc_sunscraper.cpp
|
259
|
+
|
260
|
+
moc_sunscraperproxy.o: moc_sunscraperproxy.cpp
|
261
|
+
$(CXX) -c $(CXXFLAGS) $(INCPATH) -o moc_sunscraperproxy.o moc_sunscraperproxy.cpp
|
262
|
+
|
263
|
+
####### Install
|
264
|
+
|
265
|
+
install: FORCE
|
266
|
+
|
267
|
+
uninstall: FORCE
|
268
|
+
|
269
|
+
FORCE:
|
270
|
+
|
data/ext/extconf.rb
ADDED
data/ext/sunscraper.cpp
ADDED
@@ -0,0 +1,86 @@
|
|
1
|
+
#include <QApplication>
|
2
|
+
#include <QThread>
|
3
|
+
#include <QTimer>
|
4
|
+
#include <QWebPage>
|
5
|
+
#include <QWebFrame>
|
6
|
+
#include <QMutexLocker>
|
7
|
+
#include <QtDebug>
|
8
|
+
#include "sunscraper.h"
|
9
|
+
#include "sunscraperlibrary.h"
|
10
|
+
#include "sunscraperthread.h"
|
11
|
+
|
12
|
+
unsigned Sunscraper::_nextQueryId = 1;
|
13
|
+
QMutex Sunscraper::_staticMutex;
|
14
|
+
|
15
|
+
Sunscraper::Sunscraper()
|
16
|
+
{
|
17
|
+
QMutexLocker locker(&_staticMutex);
|
18
|
+
|
19
|
+
_queryId = _nextQueryId++;
|
20
|
+
|
21
|
+
SunscraperThread *worker = SunscraperLibrary::instance()->thread();
|
22
|
+
|
23
|
+
connect(this, SIGNAL(requestLoadHtml(uint,QString)),
|
24
|
+
worker, SLOT(loadHtml(uint,QString)), Qt::QueuedConnection);
|
25
|
+
connect(this, SIGNAL(requestLoadUrl(uint,QString)),
|
26
|
+
worker, SLOT(loadUrl(uint,QString)), Qt::QueuedConnection);
|
27
|
+
connect(this, SIGNAL(requestFinalize(uint)),
|
28
|
+
worker, SLOT(finalize(uint)), Qt::QueuedConnection);
|
29
|
+
|
30
|
+
connect(worker, SIGNAL(finished(uint,QString)),
|
31
|
+
this, SLOT(finished(uint,QString)), Qt::QueuedConnection);
|
32
|
+
}
|
33
|
+
|
34
|
+
void Sunscraper::loadHtml(QString html)
|
35
|
+
{
|
36
|
+
emit requestLoadHtml(_queryId, html);
|
37
|
+
}
|
38
|
+
|
39
|
+
void Sunscraper::loadUrl(QString url)
|
40
|
+
{
|
41
|
+
emit requestLoadUrl(_queryId, url);
|
42
|
+
}
|
43
|
+
|
44
|
+
void Sunscraper::wait(unsigned timeout)
|
45
|
+
{
|
46
|
+
QTimer _timeoutTimer;
|
47
|
+
connect(&_timeoutTimer, SIGNAL(timeout()), this, SLOT(timeout()));
|
48
|
+
|
49
|
+
_timeoutTimer.setInterval(timeout);
|
50
|
+
_timeoutTimer.start();
|
51
|
+
|
52
|
+
_eventLoop.exec();
|
53
|
+
|
54
|
+
_timeoutTimer.stop();
|
55
|
+
}
|
56
|
+
|
57
|
+
void Sunscraper::finished(unsigned eventQueryId, QString html)
|
58
|
+
{
|
59
|
+
if(eventQueryId != _queryId)
|
60
|
+
return;
|
61
|
+
|
62
|
+
_eventLoop.quit();
|
63
|
+
|
64
|
+
_html = html.toUtf8();
|
65
|
+
|
66
|
+
emit requestFinalize(_queryId);
|
67
|
+
}
|
68
|
+
|
69
|
+
void Sunscraper::timeout()
|
70
|
+
{
|
71
|
+
_eventLoop.quit();
|
72
|
+
|
73
|
+
_html = "!SUNSCRAPER_TIMEOUT";
|
74
|
+
|
75
|
+
emit requestFinalize(_queryId);
|
76
|
+
}
|
77
|
+
|
78
|
+
QByteArray Sunscraper::fetch()
|
79
|
+
{
|
80
|
+
return _html;
|
81
|
+
}
|
82
|
+
|
83
|
+
const char *Sunscraper::fetchAsCString()
|
84
|
+
{
|
85
|
+
return _html.constData();
|
86
|
+
}
|
data/ext/sunscraper.h
ADDED
@@ -0,0 +1,45 @@
|
|
1
|
+
#ifndef SUNSCRAPER_H
|
2
|
+
#define SUNSCRAPER_H
|
3
|
+
|
4
|
+
#include <QObject>
|
5
|
+
#include <QString>
|
6
|
+
#include <QMutex>
|
7
|
+
#include <QByteArray>
|
8
|
+
#include <QEventLoop>
|
9
|
+
|
10
|
+
class QWebPage;
|
11
|
+
|
12
|
+
class Sunscraper : public QObject
|
13
|
+
{
|
14
|
+
Q_OBJECT
|
15
|
+
|
16
|
+
public:
|
17
|
+
Sunscraper();
|
18
|
+
|
19
|
+
void loadHtml(QString html);
|
20
|
+
void loadUrl(QString url);
|
21
|
+
|
22
|
+
void wait(unsigned timeout);
|
23
|
+
|
24
|
+
QByteArray fetch();
|
25
|
+
const char *fetchAsCString();
|
26
|
+
|
27
|
+
private slots:
|
28
|
+
void finished(unsigned queryId, QString html);
|
29
|
+
void timeout();
|
30
|
+
|
31
|
+
signals:
|
32
|
+
void requestLoadHtml(unsigned queryId, QString html);
|
33
|
+
void requestLoadUrl(unsigned queryId, QString html);
|
34
|
+
void requestFinalize(unsigned queryId);
|
35
|
+
|
36
|
+
private:
|
37
|
+
static unsigned _nextQueryId;
|
38
|
+
static QMutex _staticMutex;
|
39
|
+
|
40
|
+
unsigned _queryId;
|
41
|
+
QEventLoop _eventLoop;
|
42
|
+
QByteArray _html;
|
43
|
+
};
|
44
|
+
|
45
|
+
#endif // SUNSCRAPER_H
|
data/ext/sunscraper.pro
ADDED
@@ -0,0 +1,15 @@
|
|
1
|
+
QT += webkit
|
2
|
+
|
3
|
+
TARGET = sunscraper
|
4
|
+
TEMPLATE = lib
|
5
|
+
|
6
|
+
SOURCES += sunscraperlibrary.cpp \
|
7
|
+
sunscraperthread.cpp \
|
8
|
+
sunscraperexternal.cpp \
|
9
|
+
sunscraper.cpp \
|
10
|
+
sunscraperproxy.cpp
|
11
|
+
|
12
|
+
HEADERS += sunscraperlibrary.h \
|
13
|
+
sunscraperthread.h \
|
14
|
+
sunscraper.h \
|
15
|
+
sunscraperproxy.h
|
@@ -0,0 +1,33 @@
|
|
1
|
+
#include "sunscraper.h"
|
2
|
+
|
3
|
+
extern "C" {
|
4
|
+
Sunscraper *sunscraper_create()
|
5
|
+
{
|
6
|
+
return new Sunscraper();
|
7
|
+
}
|
8
|
+
|
9
|
+
void sunscraper_load_html(Sunscraper *sunscraper, const char *html)
|
10
|
+
{
|
11
|
+
sunscraper->loadHtml(html);
|
12
|
+
}
|
13
|
+
|
14
|
+
void sunscraper_load_url(Sunscraper *sunscraper, const char *url)
|
15
|
+
{
|
16
|
+
sunscraper->loadUrl(url);
|
17
|
+
}
|
18
|
+
|
19
|
+
void sunscraper_wait(Sunscraper *sunscraper, unsigned timeout)
|
20
|
+
{
|
21
|
+
sunscraper->wait(timeout);
|
22
|
+
}
|
23
|
+
|
24
|
+
const char *sunscraper_fetch(Sunscraper *sunscraper)
|
25
|
+
{
|
26
|
+
return sunscraper->fetchAsCString();
|
27
|
+
}
|
28
|
+
|
29
|
+
void sunscraper_discard(Sunscraper *sunscraper)
|
30
|
+
{
|
31
|
+
delete sunscraper;
|
32
|
+
}
|
33
|
+
}
|
@@ -0,0 +1,25 @@
|
|
1
|
+
#include "sunscraperlibrary.h"
|
2
|
+
#include "sunscraperthread.h"
|
3
|
+
|
4
|
+
SunscraperLibrary SunscraperLibrary::_instance;
|
5
|
+
|
6
|
+
SunscraperLibrary::SunscraperLibrary()
|
7
|
+
{
|
8
|
+
_apartmentThread = new SunscraperThread();
|
9
|
+
_apartmentThread->start();
|
10
|
+
}
|
11
|
+
|
12
|
+
SunscraperLibrary::~SunscraperLibrary()
|
13
|
+
{
|
14
|
+
/* Do nothing. This is on purpose. */
|
15
|
+
}
|
16
|
+
|
17
|
+
SunscraperLibrary *SunscraperLibrary::instance()
|
18
|
+
{
|
19
|
+
return &_instance;
|
20
|
+
}
|
21
|
+
|
22
|
+
SunscraperThread *SunscraperLibrary::thread()
|
23
|
+
{
|
24
|
+
return _apartmentThread;
|
25
|
+
}
|
@@ -0,0 +1,22 @@
|
|
1
|
+
#ifndef SUNSCRAPERLIBRARY_H
|
2
|
+
#define SUNSCRAPERLIBRARY_H
|
3
|
+
|
4
|
+
class SunscraperThread;
|
5
|
+
|
6
|
+
class SunscraperLibrary {
|
7
|
+
public:
|
8
|
+
static SunscraperLibrary *instance();
|
9
|
+
|
10
|
+
SunscraperThread *thread();
|
11
|
+
|
12
|
+
private:
|
13
|
+
SunscraperLibrary();
|
14
|
+
SunscraperLibrary(SunscraperLibrary &);
|
15
|
+
~SunscraperLibrary();
|
16
|
+
|
17
|
+
static SunscraperLibrary _instance;
|
18
|
+
|
19
|
+
SunscraperThread *_apartmentThread;
|
20
|
+
};
|
21
|
+
|
22
|
+
#endif // SUNSCRAPER_H
|
@@ -0,0 +1,13 @@
|
|
1
|
+
#include <QWebPage>
|
2
|
+
#include <QWebFrame>
|
3
|
+
#include "sunscraperproxy.h"
|
4
|
+
|
5
|
+
SunscraperProxy::SunscraperProxy(QWebPage *parent, unsigned queryId) :
|
6
|
+
QObject(parent), _webPage(parent), _queryId(queryId)
|
7
|
+
{
|
8
|
+
}
|
9
|
+
|
10
|
+
void SunscraperProxy::finish()
|
11
|
+
{
|
12
|
+
emit finished(_queryId, _webPage->mainFrame()->toHtml());
|
13
|
+
}
|
@@ -0,0 +1,24 @@
|
|
1
|
+
#ifndef SUNSCRAPERPROXY_H
|
2
|
+
#define SUNSCRAPERPROXY_H
|
3
|
+
|
4
|
+
#include <QObject>
|
5
|
+
|
6
|
+
class QWebPage;
|
7
|
+
|
8
|
+
class SunscraperProxy : public QObject
|
9
|
+
{
|
10
|
+
Q_OBJECT
|
11
|
+
public:
|
12
|
+
SunscraperProxy(QWebPage *parent, unsigned queryId);
|
13
|
+
|
14
|
+
Q_INVOKABLE void finish();
|
15
|
+
|
16
|
+
signals:
|
17
|
+
void finished(unsigned _queryId, QString html);
|
18
|
+
|
19
|
+
private:
|
20
|
+
QWebPage *_webPage;
|
21
|
+
unsigned _queryId;
|
22
|
+
};
|
23
|
+
|
24
|
+
#endif // SUNSCRAPERPROXY_H
|
@@ -0,0 +1,67 @@
|
|
1
|
+
#include <QApplication>
|
2
|
+
#include <QWebPage>
|
3
|
+
#include <QWebFrame>
|
4
|
+
#include "sunscraperthread.h"
|
5
|
+
#include "sunscraperproxy.h"
|
6
|
+
|
7
|
+
SunscraperThread::SunscraperThread()
|
8
|
+
{
|
9
|
+
}
|
10
|
+
|
11
|
+
void SunscraperThread::run()
|
12
|
+
{
|
13
|
+
static int argc;
|
14
|
+
static char **argv = {NULL};
|
15
|
+
|
16
|
+
QApplication app(argc, argv);
|
17
|
+
app.exec();
|
18
|
+
|
19
|
+
qFatal("Sunscraper apartment thread event loop should never end");
|
20
|
+
}
|
21
|
+
|
22
|
+
void SunscraperThread::loadHtml(unsigned queryId, QString html)
|
23
|
+
{
|
24
|
+
QWebPage *webPage = initializeWebPage(queryId);
|
25
|
+
webPage->mainFrame()->setHtml(html);
|
26
|
+
}
|
27
|
+
|
28
|
+
void SunscraperThread::loadUrl(unsigned queryId, QString url)
|
29
|
+
{
|
30
|
+
QWebPage *webPage = initializeWebPage(queryId);
|
31
|
+
webPage->mainFrame()->load(url);
|
32
|
+
}
|
33
|
+
|
34
|
+
void SunscraperThread::finalize(unsigned queryId)
|
35
|
+
{
|
36
|
+
Q_ASSERT(_webPages[queryId] != NULL);
|
37
|
+
|
38
|
+
_webPages[queryId]->deleteLater();
|
39
|
+
_webPages.remove(queryId);
|
40
|
+
}
|
41
|
+
|
42
|
+
QWebPage *SunscraperThread::initializeWebPage(unsigned queryId)
|
43
|
+
{
|
44
|
+
Q_ASSERT(_webPages[queryId] == NULL);
|
45
|
+
|
46
|
+
QWebPage *webPage = new QWebPage(this);
|
47
|
+
connect(webPage->mainFrame(), SIGNAL(javaScriptWindowObjectCleared()),
|
48
|
+
this, SLOT(attachAPI()));
|
49
|
+
|
50
|
+
_webPages[queryId] = webPage;
|
51
|
+
|
52
|
+
return webPage;
|
53
|
+
}
|
54
|
+
|
55
|
+
void SunscraperThread::attachAPI()
|
56
|
+
{
|
57
|
+
QWebFrame *origin = static_cast<QWebFrame *>(QObject::sender());
|
58
|
+
QWebPage *page = origin->page();
|
59
|
+
|
60
|
+
unsigned queryId = _webPages.key(page, 0);
|
61
|
+
Q_ASSERT(queryId != 0);
|
62
|
+
|
63
|
+
SunscraperProxy *proxy = new SunscraperProxy(page, queryId);
|
64
|
+
connect(proxy, SIGNAL(finished(uint,QString)), this, SIGNAL(finished(uint,QString)));
|
65
|
+
|
66
|
+
origin->addToJavaScriptWindowObject("Sunscraper", proxy, QScriptEngine::QtOwnership);
|
67
|
+
}
|
@@ -0,0 +1,34 @@
|
|
1
|
+
#ifndef SUNSCRAPERTHREAD_H
|
2
|
+
#define SUNSCRAPERTHREAD_H
|
3
|
+
|
4
|
+
#include <QThread>
|
5
|
+
#include <QMap>
|
6
|
+
|
7
|
+
class QWebPage;
|
8
|
+
|
9
|
+
class SunscraperThread : public QThread
|
10
|
+
{
|
11
|
+
Q_OBJECT
|
12
|
+
public:
|
13
|
+
SunscraperThread();
|
14
|
+
|
15
|
+
void run();
|
16
|
+
|
17
|
+
signals:
|
18
|
+
void finished(unsigned queryId, QString result);
|
19
|
+
|
20
|
+
public slots:
|
21
|
+
void loadHtml(unsigned queryId, QString html);
|
22
|
+
void loadUrl(unsigned queryId, QString url);
|
23
|
+
void finalize(unsigned queryId);
|
24
|
+
|
25
|
+
private slots:
|
26
|
+
void attachAPI();
|
27
|
+
|
28
|
+
private:
|
29
|
+
QMap<unsigned, QWebPage *> _webPages;
|
30
|
+
|
31
|
+
QWebPage *initializeWebPage(unsigned queryId);
|
32
|
+
};
|
33
|
+
|
34
|
+
#endif // SUNSCRAPERTHREAD_H
|
data/lib/sunscraper.rb
ADDED
@@ -0,0 +1,50 @@
|
|
1
|
+
require 'sunscraper/library'
|
2
|
+
|
3
|
+
# Sunscraper loads an HTML page in a headless browser and waits for `Sunscraper.finish()`
|
4
|
+
# method to be called. It blocks the calling thread, but is threadsafe, does
|
5
|
+
# not acquire GIL and thus can be called from multiple threads simultaneously.
|
6
|
+
module Sunscraper
|
7
|
+
# ScrapeTimeout error is raised when the page could not be loaded fast enough.
|
8
|
+
class ScrapeTimeout < StandardError; end
|
9
|
+
|
10
|
+
class << self
|
11
|
+
# Scrape an inline HTML. The content is loaded without a particular base URL.
|
12
|
+
# If your application depends on base URL being available, use {scrape_url}.
|
13
|
+
#
|
14
|
+
# @param [Integer] timeout timeout in milliseconds
|
15
|
+
def scrape_html(html, timeout=5000)
|
16
|
+
scrape(timeout) do |context|
|
17
|
+
Library.load_html context, html
|
18
|
+
end
|
19
|
+
end
|
20
|
+
|
21
|
+
# Scrape an URL.
|
22
|
+
#
|
23
|
+
# @param [Integer] timeout timeout in milliseconds
|
24
|
+
def scrape_url(url, timeout=5000)
|
25
|
+
scrape(timeout) do |context|
|
26
|
+
Library.load_url context, url
|
27
|
+
end
|
28
|
+
end
|
29
|
+
|
30
|
+
private
|
31
|
+
|
32
|
+
def scrape(timeout)
|
33
|
+
context = Library.create
|
34
|
+
|
35
|
+
yield context
|
36
|
+
|
37
|
+
Library.wait(context, timeout)
|
38
|
+
|
39
|
+
data = Library.fetch(context)
|
40
|
+
|
41
|
+
if data == "!SUNSCRAPER_TIMEOUT"
|
42
|
+
raise ScrapeTimeout, "Sunscraper has timed out waiting for the callback"
|
43
|
+
else
|
44
|
+
data
|
45
|
+
end
|
46
|
+
ensure
|
47
|
+
Library.discard(context) if context
|
48
|
+
end
|
49
|
+
end
|
50
|
+
end
|
@@ -0,0 +1,37 @@
|
|
1
|
+
if !defined?(RUBY_ENGINE) && RUBY_VERSION =~ /^1.8/
|
2
|
+
raise RuntimeError, "Sunscraper does not work on Ruby MRI 1.8.x."
|
3
|
+
end
|
4
|
+
|
5
|
+
require 'ffi'
|
6
|
+
|
7
|
+
# @private
|
8
|
+
module Sunscraper::Library
|
9
|
+
extend FFI::Library
|
10
|
+
|
11
|
+
# RbConfig sniffing does not work on JRuby.
|
12
|
+
if Gem.win_platform?
|
13
|
+
extension = 'dll'
|
14
|
+
elsif RUBY_PLATFORM =~ /darwin/i
|
15
|
+
extension = 'dylib'
|
16
|
+
else
|
17
|
+
extension = 'so'
|
18
|
+
end
|
19
|
+
|
20
|
+
ffi_lib File.join(Gem.loaded_specs['sunscraper'].full_gem_path,
|
21
|
+
'ext', "libsunscraper.#{extension}")
|
22
|
+
|
23
|
+
attach_function 'create', :sunscraper_create, [], :pointer
|
24
|
+
attach_function 'load_html', :sunscraper_load_html, [:pointer, :string], :void
|
25
|
+
attach_function 'load_url', :sunscraper_load_url, [:pointer, :string], :void
|
26
|
+
attach_function 'fetch', :sunscraper_fetch, [:pointer], :string
|
27
|
+
attach_function 'discard', :sunscraper_discard, [:pointer], :void
|
28
|
+
|
29
|
+
if RUBY_ENGINE == 'ruby'
|
30
|
+
# MRI uses ffi gem and has GVL. Hence, it needs a rb_thread_blocking_region call.
|
31
|
+
attach_function 'wait', :sunscraper_wait, [:pointer, :uint], :void, :blocking => true
|
32
|
+
else
|
33
|
+
# Rubinius does not have GVL neither it has options in attach_function.
|
34
|
+
# Same for JRuby.
|
35
|
+
attach_function 'wait', :sunscraper_wait, [:pointer, :uint], :void
|
36
|
+
end
|
37
|
+
end
|
data/spec/spec_helper.rb
ADDED
@@ -0,0 +1,45 @@
|
|
1
|
+
require 'spec_helper'
|
2
|
+
|
3
|
+
require 'webrick'
|
4
|
+
|
5
|
+
HTML = <<HTML
|
6
|
+
<html>
|
7
|
+
<head>
|
8
|
+
<script type="text/javascript">
|
9
|
+
document.addEventListener("DOMContentLoaded", function() {
|
10
|
+
document.getElementById('fuga').textContent =
|
11
|
+
("!skrow tI").split("").reverse().join("");
|
12
|
+
Sunscraper.finish();
|
13
|
+
}, true);
|
14
|
+
</script>
|
15
|
+
</head>
|
16
|
+
<body>
|
17
|
+
<div id='fuga'></div>
|
18
|
+
</body>
|
19
|
+
</html>
|
20
|
+
HTML
|
21
|
+
|
22
|
+
PORT = 45555
|
23
|
+
|
24
|
+
describe Sunscraper do
|
25
|
+
it "can scrape an HTML provided as a string" do
|
26
|
+
Sunscraper.scrape_html(HTML).should include('It works!')
|
27
|
+
end
|
28
|
+
|
29
|
+
it "can scrape an URL" do
|
30
|
+
server = WEBrick::HTTPServer.new :Port => PORT, :Logger => WEBrick::Log.new('/dev/null'), :AccessLog => []
|
31
|
+
server.mount_proc '/' do |req, res|
|
32
|
+
res.body = HTML
|
33
|
+
end
|
34
|
+
Thread.new { server.start }
|
35
|
+
|
36
|
+
Sunscraper.scrape_url("http://localhost:#{PORT}/").should include('It works!')
|
37
|
+
|
38
|
+
server.stop
|
39
|
+
end
|
40
|
+
|
41
|
+
it "should time out if callback is not called" do
|
42
|
+
lambda { Sunscraper.scrape_html("<!-- nothing. at least no callbacks -->", 1000) }.
|
43
|
+
should raise_exception(Sunscraper::ScrapeTimeout)
|
44
|
+
end
|
45
|
+
end
|
data/sunscraper.gemspec
ADDED
@@ -0,0 +1,23 @@
|
|
1
|
+
# -*- encoding: utf-8 -*-
|
2
|
+
$:.push File.expand_path("../lib", __FILE__)
|
3
|
+
|
4
|
+
Gem::Specification.new do |s|
|
5
|
+
s.name = "sunscraper"
|
6
|
+
s.version = "1.0.0"
|
7
|
+
s.authors = ["Peter Zotov"]
|
8
|
+
s.email = ["whitequark@whitequark.org"]
|
9
|
+
s.homepage = "http://github.com/roundlake/sunscraper"
|
10
|
+
s.summary = %q{A WebKit-based, JavaScript-capable HTML scraper.}
|
11
|
+
s.description = s.summary
|
12
|
+
|
13
|
+
s.rubyforge_project = "sunscraper"
|
14
|
+
|
15
|
+
s.files = `git ls-files`.split("\n")
|
16
|
+
s.test_files = `git ls-files -- {test,spec,features}/*`.split("\n")
|
17
|
+
s.executables = `git ls-files -- bin/*`.split("\n").map{ |f| File.basename(f) }
|
18
|
+
s.extensions = ["ext/extconf.rb"]
|
19
|
+
s.require_paths = ["lib"]
|
20
|
+
|
21
|
+
s.add_development_dependency "rspec"
|
22
|
+
s.add_runtime_dependency "ffi", '>= 1.0.11'
|
23
|
+
end
|
metadata
ADDED
@@ -0,0 +1,120 @@
|
|
1
|
+
--- !ruby/object:Gem::Specification
|
2
|
+
name: sunscraper
|
3
|
+
version: !ruby/object:Gem::Version
|
4
|
+
hash: 540260530
|
5
|
+
prerelease:
|
6
|
+
segments:
|
7
|
+
- 1
|
8
|
+
- 0
|
9
|
+
- 0
|
10
|
+
version: 1.0.0
|
11
|
+
platform: ruby
|
12
|
+
authors:
|
13
|
+
- Peter Zotov
|
14
|
+
autorequire:
|
15
|
+
bindir: bin
|
16
|
+
cert_chain: []
|
17
|
+
|
18
|
+
date: 2012-02-18 00:00:00 Z
|
19
|
+
dependencies:
|
20
|
+
- !ruby/object:Gem::Dependency
|
21
|
+
name: rspec
|
22
|
+
prerelease: false
|
23
|
+
requirement: &id001 !ruby/object:Gem::Requirement
|
24
|
+
none: false
|
25
|
+
requirements:
|
26
|
+
- - ">="
|
27
|
+
- !ruby/object:Gem::Version
|
28
|
+
hash: 881230260
|
29
|
+
segments:
|
30
|
+
- 0
|
31
|
+
version: "0"
|
32
|
+
type: :development
|
33
|
+
version_requirements: *id001
|
34
|
+
- !ruby/object:Gem::Dependency
|
35
|
+
name: ffi
|
36
|
+
prerelease: false
|
37
|
+
requirement: &id002 !ruby/object:Gem::Requirement
|
38
|
+
none: false
|
39
|
+
requirements:
|
40
|
+
- - ">="
|
41
|
+
- !ruby/object:Gem::Version
|
42
|
+
hash: 716237343
|
43
|
+
segments:
|
44
|
+
- 1
|
45
|
+
- 0
|
46
|
+
- 11
|
47
|
+
version: 1.0.11
|
48
|
+
type: :runtime
|
49
|
+
version_requirements: *id002
|
50
|
+
description: A WebKit-based, JavaScript-capable HTML scraper.
|
51
|
+
email:
|
52
|
+
- whitequark@whitequark.org
|
53
|
+
executables: []
|
54
|
+
|
55
|
+
extensions:
|
56
|
+
- ext/extconf.rb
|
57
|
+
extra_rdoc_files: []
|
58
|
+
|
59
|
+
files:
|
60
|
+
- .gitignore
|
61
|
+
- .rspec
|
62
|
+
- .yardopts
|
63
|
+
- Gemfile
|
64
|
+
- LICENSE
|
65
|
+
- README.md
|
66
|
+
- Rakefile
|
67
|
+
- ext/.gitignore
|
68
|
+
- ext/Makefile
|
69
|
+
- ext/extconf.rb
|
70
|
+
- ext/sunscraper.cpp
|
71
|
+
- ext/sunscraper.h
|
72
|
+
- ext/sunscraper.pro
|
73
|
+
- ext/sunscraperexternal.cpp
|
74
|
+
- ext/sunscraperlibrary.cpp
|
75
|
+
- ext/sunscraperlibrary.h
|
76
|
+
- ext/sunscraperproxy.cpp
|
77
|
+
- ext/sunscraperproxy.h
|
78
|
+
- ext/sunscraperthread.cpp
|
79
|
+
- ext/sunscraperthread.h
|
80
|
+
- lib/sunscraper.rb
|
81
|
+
- lib/sunscraper/library.rb
|
82
|
+
- spec/spec_helper.rb
|
83
|
+
- spec/sunscraper_spec.rb
|
84
|
+
- sunscraper.gemspec
|
85
|
+
homepage: http://github.com/roundlake/sunscraper
|
86
|
+
licenses: []
|
87
|
+
|
88
|
+
post_install_message:
|
89
|
+
rdoc_options: []
|
90
|
+
|
91
|
+
require_paths:
|
92
|
+
- lib
|
93
|
+
required_ruby_version: !ruby/object:Gem::Requirement
|
94
|
+
none: false
|
95
|
+
requirements:
|
96
|
+
- - ">="
|
97
|
+
- !ruby/object:Gem::Version
|
98
|
+
hash: 881230260
|
99
|
+
segments:
|
100
|
+
- 0
|
101
|
+
version: "0"
|
102
|
+
required_rubygems_version: !ruby/object:Gem::Requirement
|
103
|
+
none: false
|
104
|
+
requirements:
|
105
|
+
- - ">="
|
106
|
+
- !ruby/object:Gem::Version
|
107
|
+
hash: 881230260
|
108
|
+
segments:
|
109
|
+
- 0
|
110
|
+
version: "0"
|
111
|
+
requirements: []
|
112
|
+
|
113
|
+
rubyforge_project: sunscraper
|
114
|
+
rubygems_version: 1.8.12
|
115
|
+
signing_key:
|
116
|
+
specification_version: 3
|
117
|
+
summary: A WebKit-based, JavaScript-capable HTML scraper.
|
118
|
+
test_files: []
|
119
|
+
|
120
|
+
has_rdoc:
|