sunscraper 1.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/.gitignore +7 -0
- data/.rspec +2 -0
- data/.yardopts +1 -0
- data/Gemfile +4 -0
- data/LICENSE +19 -0
- data/README.md +68 -0
- data/Rakefile +1 -0
- data/ext/.gitignore +4 -0
- data/ext/Makefile +270 -0
- data/ext/extconf.rb +9 -0
- data/ext/sunscraper.cpp +86 -0
- data/ext/sunscraper.h +45 -0
- data/ext/sunscraper.pro +15 -0
- data/ext/sunscraperexternal.cpp +33 -0
- data/ext/sunscraperlibrary.cpp +25 -0
- data/ext/sunscraperlibrary.h +22 -0
- data/ext/sunscraperproxy.cpp +13 -0
- data/ext/sunscraperproxy.h +24 -0
- data/ext/sunscraperthread.cpp +67 -0
- data/ext/sunscraperthread.h +34 -0
- data/lib/sunscraper.rb +50 -0
- data/lib/sunscraper/library.rb +37 -0
- data/spec/spec_helper.rb +4 -0
- data/spec/sunscraper_spec.rb +45 -0
- data/sunscraper.gemspec +23 -0
- metadata +120 -0
data/.gitignore
ADDED
data/.rspec
ADDED
data/.yardopts
ADDED
@@ -0,0 +1 @@
|
|
1
|
+
--no-private --markup markdown - LICENSE
|
data/Gemfile
ADDED
data/LICENSE
ADDED
@@ -0,0 +1,19 @@
|
|
1
|
+
Copyright (C) 2012 Peter Zotov <whitequark@whitequark.org>
|
2
|
+
|
3
|
+
Permission is hereby granted, free of charge, to any person obtaining a copy of
|
4
|
+
this software and associated documentation files (the "Software"), to deal in
|
5
|
+
the Software without restriction, including without limitation the rights to
|
6
|
+
use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies
|
7
|
+
of the Software, and to permit persons to whom the Software is furnished to do
|
8
|
+
so, subject to the following conditions:
|
9
|
+
|
10
|
+
The above copyright notice and this permission notice shall be included in all
|
11
|
+
copies or substantial portions of the Software.
|
12
|
+
|
13
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
14
|
+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
15
|
+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
16
|
+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
17
|
+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
18
|
+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
19
|
+
SOFTWARE.
|
data/README.md
ADDED
@@ -0,0 +1,68 @@
|
|
1
|
+
Sunscraper
|
2
|
+
==========
|
3
|
+
|
4
|
+
Sunscraper is a gem for prerendering pages with hashbang URLs like `http://whatever.com/#!/page`.
|
5
|
+
|
6
|
+
It works by loading content in the embedded web browser and waiting for a JavaScript method to be
|
7
|
+
called.
|
8
|
+
|
9
|
+
HTML = %{
|
10
|
+
<html>
|
11
|
+
<head>
|
12
|
+
<script type="text/javascript">
|
13
|
+
document.addEventListener("DOMContentLoaded", function() {
|
14
|
+
document.getElementById('fuga').textContent =
|
15
|
+
("!skrow tI").split("").reverse().join("");
|
16
|
+
Sunscraper.finish();
|
17
|
+
}, true);
|
18
|
+
</script>
|
19
|
+
</head>
|
20
|
+
<body>
|
21
|
+
<div id='fuga'></div>
|
22
|
+
</body>
|
23
|
+
</html>
|
24
|
+
}
|
25
|
+
|
26
|
+
Sunscraper.scrape_html(HTML).include?('It works!') # => true
|
27
|
+
|
28
|
+
See also [documentation][].
|
29
|
+
|
30
|
+
[documentation]: http://rdoc.info/gems/sunscraper/Sunscraper
|
31
|
+
|
32
|
+
Installation
|
33
|
+
------------
|
34
|
+
|
35
|
+
Sunscraper requires Qt 4.x and QtWebkit packages to be installed on the target system. *Sunscraper is not a Ruby
|
36
|
+
C extension*; it works by building a Qt shared library and loading it through [FFI][].
|
37
|
+
|
38
|
+
[FFI]: http://en.wikipedia.org/wiki/Foreign_Function_Interface
|
39
|
+
|
40
|
+
gem install sunscraper
|
41
|
+
|
42
|
+
Runtime requirements
|
43
|
+
--------------------
|
44
|
+
|
45
|
+
On Linux with Qt versions <= 4.8, Sunscraper requires a running X server and a valid `DISPLAY` environment
|
46
|
+
variable. Consider using [Xvfb][] on a GUI-less production server.
|
47
|
+
|
48
|
+
[Xvfb]: http://www.x.org/releases/X11R7.6/doc/man/man1/Xvfb.1.xhtml
|
49
|
+
|
50
|
+
Compatibility
|
51
|
+
-------------
|
52
|
+
|
53
|
+
Sunscraper should be compatible across all major implementations on all major operating systems, including
|
54
|
+
Ruby MRI 1.9, JRuby, Rubinius and MacRuby running on GNU/Linux, OS X and Windows.
|
55
|
+
|
56
|
+
JRuby versions up to 1.6.5 are known not to work due to a bug in its FFI library.
|
57
|
+
|
58
|
+
Ruby MRI 1.8 is not supported because it has a braindead threading model and will never be because I don't care.
|
59
|
+
|
60
|
+
Thread safety
|
61
|
+
-------------
|
62
|
+
|
63
|
+
Sunscraper is thread-safe.
|
64
|
+
|
65
|
+
License
|
66
|
+
-------
|
67
|
+
|
68
|
+
Sunscraper is distributed under the terms of a MIT license; see LICENSE in the source distribution.
|
data/Rakefile
ADDED
@@ -0,0 +1 @@
|
|
1
|
+
require "bundler/gem_tasks"
|
data/ext/.gitignore
ADDED
data/ext/Makefile
ADDED
@@ -0,0 +1,270 @@
|
|
1
|
+
#############################################################################
|
2
|
+
# Makefile for building: libsunscraper.so.1.0.0
|
3
|
+
# Generated by qmake (2.01a) (Qt 4.7.4) on: Sat Feb 18 05:54:02 2012
|
4
|
+
# Project: sunscraper.pro
|
5
|
+
# Template: lib
|
6
|
+
# Command: /usr/bin/qmake-qt4 -spec /usr/share/qt4/mkspecs/linux-g++ CONFIG+=debug -o Makefile sunscraper.pro
|
7
|
+
#############################################################################
|
8
|
+
|
9
|
+
####### Compiler, tools and options
|
10
|
+
|
11
|
+
CC = gcc
|
12
|
+
CXX = g++
|
13
|
+
DEFINES = -DQT_WEBKIT -DQT_WEBKIT_LIB -DQT_GUI_LIB -DQT_CORE_LIB -DQT_SHARED
|
14
|
+
CFLAGS = -pipe -g -Wall -W -D_REENTRANT -fPIC $(DEFINES)
|
15
|
+
CXXFLAGS = -pipe -g -Wall -W -D_REENTRANT -fPIC $(DEFINES)
|
16
|
+
INCPATH = -I/usr/share/qt4/mkspecs/linux-g++ -I. -I/usr/include/qt4/QtCore -I/usr/include/qt4/QtGui -I/usr/include/qt4/QtWebKit -I/usr/include/qt4 -I.
|
17
|
+
LINK = g++
|
18
|
+
LFLAGS = -shared -Wl,-soname,libsunscraper.so.1
|
19
|
+
LIBS = $(SUBLIBS) -L/usr/lib -lQtWebKit -lQtGui -lQtCore -lpthread
|
20
|
+
AR = ar cqs
|
21
|
+
RANLIB =
|
22
|
+
QMAKE = /usr/bin/qmake-qt4
|
23
|
+
TAR = tar -cf
|
24
|
+
COMPRESS = gzip -9f
|
25
|
+
COPY = cp -f
|
26
|
+
SED = sed
|
27
|
+
COPY_FILE = $(COPY)
|
28
|
+
COPY_DIR = $(COPY) -r
|
29
|
+
STRIP = strip
|
30
|
+
INSTALL_FILE = install -m 644 -p
|
31
|
+
INSTALL_DIR = $(COPY_DIR)
|
32
|
+
INSTALL_PROGRAM = install -m 755 -p
|
33
|
+
DEL_FILE = rm -f
|
34
|
+
SYMLINK = ln -f -s
|
35
|
+
DEL_DIR = rmdir
|
36
|
+
MOVE = mv -f
|
37
|
+
CHK_DIR_EXISTS= test -d
|
38
|
+
MKDIR = mkdir -p
|
39
|
+
|
40
|
+
####### Output directory
|
41
|
+
|
42
|
+
OBJECTS_DIR = ./
|
43
|
+
|
44
|
+
####### Files
|
45
|
+
|
46
|
+
SOURCES = sunscraperlibrary.cpp \
|
47
|
+
sunscraperthread.cpp \
|
48
|
+
sunscraperexternal.cpp \
|
49
|
+
sunscraper.cpp \
|
50
|
+
sunscraperproxy.cpp moc_sunscraperthread.cpp \
|
51
|
+
moc_sunscraper.cpp \
|
52
|
+
moc_sunscraperproxy.cpp
|
53
|
+
OBJECTS = sunscraperlibrary.o \
|
54
|
+
sunscraperthread.o \
|
55
|
+
sunscraperexternal.o \
|
56
|
+
sunscraper.o \
|
57
|
+
sunscraperproxy.o \
|
58
|
+
moc_sunscraperthread.o \
|
59
|
+
moc_sunscraper.o \
|
60
|
+
moc_sunscraperproxy.o
|
61
|
+
DIST = /usr/share/qt4/mkspecs/common/g++.conf \
|
62
|
+
/usr/share/qt4/mkspecs/common/unix.conf \
|
63
|
+
/usr/share/qt4/mkspecs/common/linux.conf \
|
64
|
+
/usr/share/qt4/mkspecs/qconfig.pri \
|
65
|
+
/usr/share/qt4/mkspecs/modules/qt_webkit_version.pri \
|
66
|
+
/usr/share/qt4/mkspecs/features/qt_functions.prf \
|
67
|
+
/usr/share/qt4/mkspecs/features/qt_config.prf \
|
68
|
+
/usr/share/qt4/mkspecs/features/exclusive_builds.prf \
|
69
|
+
/usr/share/qt4/mkspecs/features/default_pre.prf \
|
70
|
+
/usr/share/qt4/mkspecs/features/debug.prf \
|
71
|
+
/usr/share/qt4/mkspecs/features/default_post.prf \
|
72
|
+
/usr/share/qt4/mkspecs/features/warn_on.prf \
|
73
|
+
/usr/share/qt4/mkspecs/features/qt.prf \
|
74
|
+
/usr/share/qt4/mkspecs/features/unix/thread.prf \
|
75
|
+
/usr/share/qt4/mkspecs/features/moc.prf \
|
76
|
+
/usr/share/qt4/mkspecs/features/resources.prf \
|
77
|
+
/usr/share/qt4/mkspecs/features/uic.prf \
|
78
|
+
/usr/share/qt4/mkspecs/features/yacc.prf \
|
79
|
+
/usr/share/qt4/mkspecs/features/lex.prf \
|
80
|
+
/usr/share/qt4/mkspecs/features/include_source_dir.prf \
|
81
|
+
sunscraper.pro
|
82
|
+
QMAKE_TARGET = sunscraper
|
83
|
+
DESTDIR =
|
84
|
+
TARGET = libsunscraper.so.1.0.0
|
85
|
+
TARGETA = libsunscraper.a
|
86
|
+
TARGETD = libsunscraper.so.1.0.0
|
87
|
+
TARGET0 = libsunscraper.so
|
88
|
+
TARGET1 = libsunscraper.so.1
|
89
|
+
TARGET2 = libsunscraper.so.1.0
|
90
|
+
|
91
|
+
first: all
|
92
|
+
####### Implicit rules
|
93
|
+
|
94
|
+
.SUFFIXES: .o .c .cpp .cc .cxx .C
|
95
|
+
|
96
|
+
.cpp.o:
|
97
|
+
$(CXX) -c $(CXXFLAGS) $(INCPATH) -o "$@" "$<"
|
98
|
+
|
99
|
+
.cc.o:
|
100
|
+
$(CXX) -c $(CXXFLAGS) $(INCPATH) -o "$@" "$<"
|
101
|
+
|
102
|
+
.cxx.o:
|
103
|
+
$(CXX) -c $(CXXFLAGS) $(INCPATH) -o "$@" "$<"
|
104
|
+
|
105
|
+
.C.o:
|
106
|
+
$(CXX) -c $(CXXFLAGS) $(INCPATH) -o "$@" "$<"
|
107
|
+
|
108
|
+
.c.o:
|
109
|
+
$(CC) -c $(CFLAGS) $(INCPATH) -o "$@" "$<"
|
110
|
+
|
111
|
+
####### Build rules
|
112
|
+
|
113
|
+
all: Makefile $(TARGET)
|
114
|
+
|
115
|
+
$(TARGET): $(OBJECTS) $(SUBLIBS) $(OBJCOMP)
|
116
|
+
-$(DEL_FILE) $(TARGET) $(TARGET0) $(TARGET1) $(TARGET2)
|
117
|
+
$(LINK) $(LFLAGS) -o $(TARGET) $(OBJECTS) $(LIBS) $(OBJCOMP)
|
118
|
+
-ln -s $(TARGET) $(TARGET0)
|
119
|
+
-ln -s $(TARGET) $(TARGET1)
|
120
|
+
-ln -s $(TARGET) $(TARGET2)
|
121
|
+
|
122
|
+
|
123
|
+
|
124
|
+
staticlib: $(TARGETA)
|
125
|
+
|
126
|
+
$(TARGETA): $(OBJECTS) $(OBJCOMP)
|
127
|
+
-$(DEL_FILE) $(TARGETA)
|
128
|
+
$(AR) $(TARGETA) $(OBJECTS)
|
129
|
+
|
130
|
+
Makefile: sunscraper.pro /usr/share/qt4/mkspecs/linux-g++/qmake.conf /usr/share/qt4/mkspecs/common/g++.conf \
|
131
|
+
/usr/share/qt4/mkspecs/common/unix.conf \
|
132
|
+
/usr/share/qt4/mkspecs/common/linux.conf \
|
133
|
+
/usr/share/qt4/mkspecs/qconfig.pri \
|
134
|
+
/usr/share/qt4/mkspecs/modules/qt_webkit_version.pri \
|
135
|
+
/usr/share/qt4/mkspecs/features/qt_functions.prf \
|
136
|
+
/usr/share/qt4/mkspecs/features/qt_config.prf \
|
137
|
+
/usr/share/qt4/mkspecs/features/exclusive_builds.prf \
|
138
|
+
/usr/share/qt4/mkspecs/features/default_pre.prf \
|
139
|
+
/usr/share/qt4/mkspecs/features/debug.prf \
|
140
|
+
/usr/share/qt4/mkspecs/features/default_post.prf \
|
141
|
+
/usr/share/qt4/mkspecs/features/warn_on.prf \
|
142
|
+
/usr/share/qt4/mkspecs/features/qt.prf \
|
143
|
+
/usr/share/qt4/mkspecs/features/unix/thread.prf \
|
144
|
+
/usr/share/qt4/mkspecs/features/moc.prf \
|
145
|
+
/usr/share/qt4/mkspecs/features/resources.prf \
|
146
|
+
/usr/share/qt4/mkspecs/features/uic.prf \
|
147
|
+
/usr/share/qt4/mkspecs/features/yacc.prf \
|
148
|
+
/usr/share/qt4/mkspecs/features/lex.prf \
|
149
|
+
/usr/share/qt4/mkspecs/features/include_source_dir.prf \
|
150
|
+
/usr/lib/libQtWebKit.prl \
|
151
|
+
/usr/lib/libQtGui.prl \
|
152
|
+
/usr/lib/libQtCore.prl
|
153
|
+
$(QMAKE) -spec /usr/share/qt4/mkspecs/linux-g++ CONFIG+=debug -o Makefile sunscraper.pro
|
154
|
+
/usr/share/qt4/mkspecs/common/g++.conf:
|
155
|
+
/usr/share/qt4/mkspecs/common/unix.conf:
|
156
|
+
/usr/share/qt4/mkspecs/common/linux.conf:
|
157
|
+
/usr/share/qt4/mkspecs/qconfig.pri:
|
158
|
+
/usr/share/qt4/mkspecs/modules/qt_webkit_version.pri:
|
159
|
+
/usr/share/qt4/mkspecs/features/qt_functions.prf:
|
160
|
+
/usr/share/qt4/mkspecs/features/qt_config.prf:
|
161
|
+
/usr/share/qt4/mkspecs/features/exclusive_builds.prf:
|
162
|
+
/usr/share/qt4/mkspecs/features/default_pre.prf:
|
163
|
+
/usr/share/qt4/mkspecs/features/debug.prf:
|
164
|
+
/usr/share/qt4/mkspecs/features/default_post.prf:
|
165
|
+
/usr/share/qt4/mkspecs/features/warn_on.prf:
|
166
|
+
/usr/share/qt4/mkspecs/features/qt.prf:
|
167
|
+
/usr/share/qt4/mkspecs/features/unix/thread.prf:
|
168
|
+
/usr/share/qt4/mkspecs/features/moc.prf:
|
169
|
+
/usr/share/qt4/mkspecs/features/resources.prf:
|
170
|
+
/usr/share/qt4/mkspecs/features/uic.prf:
|
171
|
+
/usr/share/qt4/mkspecs/features/yacc.prf:
|
172
|
+
/usr/share/qt4/mkspecs/features/lex.prf:
|
173
|
+
/usr/share/qt4/mkspecs/features/include_source_dir.prf:
|
174
|
+
/usr/lib/libQtWebKit.prl:
|
175
|
+
/usr/lib/libQtGui.prl:
|
176
|
+
/usr/lib/libQtCore.prl:
|
177
|
+
qmake: FORCE
|
178
|
+
@$(QMAKE) -spec /usr/share/qt4/mkspecs/linux-g++ CONFIG+=debug -o Makefile sunscraper.pro
|
179
|
+
|
180
|
+
dist:
|
181
|
+
@$(CHK_DIR_EXISTS) .tmp/sunscraper1.0.0 || $(MKDIR) .tmp/sunscraper1.0.0
|
182
|
+
$(COPY_FILE) --parents $(SOURCES) $(DIST) .tmp/sunscraper1.0.0/ && $(COPY_FILE) --parents sunscraperlibrary.h sunscraperthread.h sunscraper.h sunscraperproxy.h .tmp/sunscraper1.0.0/ && $(COPY_FILE) --parents sunscraperlibrary.cpp sunscraperthread.cpp sunscraperexternal.cpp sunscraper.cpp sunscraperproxy.cpp .tmp/sunscraper1.0.0/ && (cd `dirname .tmp/sunscraper1.0.0` && $(TAR) sunscraper1.0.0.tar sunscraper1.0.0 && $(COMPRESS) sunscraper1.0.0.tar) && $(MOVE) `dirname .tmp/sunscraper1.0.0`/sunscraper1.0.0.tar.gz . && $(DEL_FILE) -r .tmp/sunscraper1.0.0
|
183
|
+
|
184
|
+
|
185
|
+
clean:compiler_clean
|
186
|
+
-$(DEL_FILE) $(OBJECTS)
|
187
|
+
-$(DEL_FILE) *~ core *.core
|
188
|
+
|
189
|
+
|
190
|
+
####### Sub-libraries
|
191
|
+
|
192
|
+
distclean: clean
|
193
|
+
-$(DEL_FILE) $(TARGET)
|
194
|
+
-$(DEL_FILE) $(TARGET0) $(TARGET1) $(TARGET2) $(TARGETA)
|
195
|
+
-$(DEL_FILE) Makefile
|
196
|
+
|
197
|
+
|
198
|
+
check: first
|
199
|
+
|
200
|
+
mocclean: compiler_moc_header_clean compiler_moc_source_clean
|
201
|
+
|
202
|
+
mocables: compiler_moc_header_make_all compiler_moc_source_make_all
|
203
|
+
|
204
|
+
compiler_moc_header_make_all: moc_sunscraperthread.cpp moc_sunscraper.cpp moc_sunscraperproxy.cpp
|
205
|
+
compiler_moc_header_clean:
|
206
|
+
-$(DEL_FILE) moc_sunscraperthread.cpp moc_sunscraper.cpp moc_sunscraperproxy.cpp
|
207
|
+
moc_sunscraperthread.cpp: sunscraperthread.h
|
208
|
+
/usr/bin/moc-qt4 $(DEFINES) $(INCPATH) sunscraperthread.h -o moc_sunscraperthread.cpp
|
209
|
+
|
210
|
+
moc_sunscraper.cpp: sunscraper.h
|
211
|
+
/usr/bin/moc-qt4 $(DEFINES) $(INCPATH) sunscraper.h -o moc_sunscraper.cpp
|
212
|
+
|
213
|
+
moc_sunscraperproxy.cpp: sunscraperproxy.h
|
214
|
+
/usr/bin/moc-qt4 $(DEFINES) $(INCPATH) sunscraperproxy.h -o moc_sunscraperproxy.cpp
|
215
|
+
|
216
|
+
compiler_rcc_make_all:
|
217
|
+
compiler_rcc_clean:
|
218
|
+
compiler_image_collection_make_all: qmake_image_collection.cpp
|
219
|
+
compiler_image_collection_clean:
|
220
|
+
-$(DEL_FILE) qmake_image_collection.cpp
|
221
|
+
compiler_moc_source_make_all:
|
222
|
+
compiler_moc_source_clean:
|
223
|
+
compiler_uic_make_all:
|
224
|
+
compiler_uic_clean:
|
225
|
+
compiler_yacc_decl_make_all:
|
226
|
+
compiler_yacc_decl_clean:
|
227
|
+
compiler_yacc_impl_make_all:
|
228
|
+
compiler_yacc_impl_clean:
|
229
|
+
compiler_lex_make_all:
|
230
|
+
compiler_lex_clean:
|
231
|
+
compiler_clean: compiler_moc_header_clean
|
232
|
+
|
233
|
+
####### Compile
|
234
|
+
|
235
|
+
sunscraperlibrary.o: sunscraperlibrary.cpp sunscraperlibrary.h \
|
236
|
+
sunscraperthread.h
|
237
|
+
$(CXX) -c $(CXXFLAGS) $(INCPATH) -o sunscraperlibrary.o sunscraperlibrary.cpp
|
238
|
+
|
239
|
+
sunscraperthread.o: sunscraperthread.cpp sunscraperthread.h \
|
240
|
+
sunscraperproxy.h
|
241
|
+
$(CXX) -c $(CXXFLAGS) $(INCPATH) -o sunscraperthread.o sunscraperthread.cpp
|
242
|
+
|
243
|
+
sunscraperexternal.o: sunscraperexternal.cpp sunscraper.h
|
244
|
+
$(CXX) -c $(CXXFLAGS) $(INCPATH) -o sunscraperexternal.o sunscraperexternal.cpp
|
245
|
+
|
246
|
+
sunscraper.o: sunscraper.cpp sunscraper.h \
|
247
|
+
sunscraperlibrary.h \
|
248
|
+
sunscraperthread.h
|
249
|
+
$(CXX) -c $(CXXFLAGS) $(INCPATH) -o sunscraper.o sunscraper.cpp
|
250
|
+
|
251
|
+
sunscraperproxy.o: sunscraperproxy.cpp sunscraperproxy.h
|
252
|
+
$(CXX) -c $(CXXFLAGS) $(INCPATH) -o sunscraperproxy.o sunscraperproxy.cpp
|
253
|
+
|
254
|
+
moc_sunscraperthread.o: moc_sunscraperthread.cpp
|
255
|
+
$(CXX) -c $(CXXFLAGS) $(INCPATH) -o moc_sunscraperthread.o moc_sunscraperthread.cpp
|
256
|
+
|
257
|
+
moc_sunscraper.o: moc_sunscraper.cpp
|
258
|
+
$(CXX) -c $(CXXFLAGS) $(INCPATH) -o moc_sunscraper.o moc_sunscraper.cpp
|
259
|
+
|
260
|
+
moc_sunscraperproxy.o: moc_sunscraperproxy.cpp
|
261
|
+
$(CXX) -c $(CXXFLAGS) $(INCPATH) -o moc_sunscraperproxy.o moc_sunscraperproxy.cpp
|
262
|
+
|
263
|
+
####### Install
|
264
|
+
|
265
|
+
install: FORCE
|
266
|
+
|
267
|
+
uninstall: FORCE
|
268
|
+
|
269
|
+
FORCE:
|
270
|
+
|
data/ext/extconf.rb
ADDED
data/ext/sunscraper.cpp
ADDED
@@ -0,0 +1,86 @@
|
|
1
|
+
#include <QApplication>
|
2
|
+
#include <QThread>
|
3
|
+
#include <QTimer>
|
4
|
+
#include <QWebPage>
|
5
|
+
#include <QWebFrame>
|
6
|
+
#include <QMutexLocker>
|
7
|
+
#include <QtDebug>
|
8
|
+
#include "sunscraper.h"
|
9
|
+
#include "sunscraperlibrary.h"
|
10
|
+
#include "sunscraperthread.h"
|
11
|
+
|
12
|
+
unsigned Sunscraper::_nextQueryId = 1;
|
13
|
+
QMutex Sunscraper::_staticMutex;
|
14
|
+
|
15
|
+
Sunscraper::Sunscraper()
|
16
|
+
{
|
17
|
+
QMutexLocker locker(&_staticMutex);
|
18
|
+
|
19
|
+
_queryId = _nextQueryId++;
|
20
|
+
|
21
|
+
SunscraperThread *worker = SunscraperLibrary::instance()->thread();
|
22
|
+
|
23
|
+
connect(this, SIGNAL(requestLoadHtml(uint,QString)),
|
24
|
+
worker, SLOT(loadHtml(uint,QString)), Qt::QueuedConnection);
|
25
|
+
connect(this, SIGNAL(requestLoadUrl(uint,QString)),
|
26
|
+
worker, SLOT(loadUrl(uint,QString)), Qt::QueuedConnection);
|
27
|
+
connect(this, SIGNAL(requestFinalize(uint)),
|
28
|
+
worker, SLOT(finalize(uint)), Qt::QueuedConnection);
|
29
|
+
|
30
|
+
connect(worker, SIGNAL(finished(uint,QString)),
|
31
|
+
this, SLOT(finished(uint,QString)), Qt::QueuedConnection);
|
32
|
+
}
|
33
|
+
|
34
|
+
void Sunscraper::loadHtml(QString html)
|
35
|
+
{
|
36
|
+
emit requestLoadHtml(_queryId, html);
|
37
|
+
}
|
38
|
+
|
39
|
+
void Sunscraper::loadUrl(QString url)
|
40
|
+
{
|
41
|
+
emit requestLoadUrl(_queryId, url);
|
42
|
+
}
|
43
|
+
|
44
|
+
void Sunscraper::wait(unsigned timeout)
|
45
|
+
{
|
46
|
+
QTimer _timeoutTimer;
|
47
|
+
connect(&_timeoutTimer, SIGNAL(timeout()), this, SLOT(timeout()));
|
48
|
+
|
49
|
+
_timeoutTimer.setInterval(timeout);
|
50
|
+
_timeoutTimer.start();
|
51
|
+
|
52
|
+
_eventLoop.exec();
|
53
|
+
|
54
|
+
_timeoutTimer.stop();
|
55
|
+
}
|
56
|
+
|
57
|
+
void Sunscraper::finished(unsigned eventQueryId, QString html)
|
58
|
+
{
|
59
|
+
if(eventQueryId != _queryId)
|
60
|
+
return;
|
61
|
+
|
62
|
+
_eventLoop.quit();
|
63
|
+
|
64
|
+
_html = html.toUtf8();
|
65
|
+
|
66
|
+
emit requestFinalize(_queryId);
|
67
|
+
}
|
68
|
+
|
69
|
+
void Sunscraper::timeout()
|
70
|
+
{
|
71
|
+
_eventLoop.quit();
|
72
|
+
|
73
|
+
_html = "!SUNSCRAPER_TIMEOUT";
|
74
|
+
|
75
|
+
emit requestFinalize(_queryId);
|
76
|
+
}
|
77
|
+
|
78
|
+
QByteArray Sunscraper::fetch()
|
79
|
+
{
|
80
|
+
return _html;
|
81
|
+
}
|
82
|
+
|
83
|
+
const char *Sunscraper::fetchAsCString()
|
84
|
+
{
|
85
|
+
return _html.constData();
|
86
|
+
}
|
data/ext/sunscraper.h
ADDED
@@ -0,0 +1,45 @@
|
|
1
|
+
#ifndef SUNSCRAPER_H
|
2
|
+
#define SUNSCRAPER_H
|
3
|
+
|
4
|
+
#include <QObject>
|
5
|
+
#include <QString>
|
6
|
+
#include <QMutex>
|
7
|
+
#include <QByteArray>
|
8
|
+
#include <QEventLoop>
|
9
|
+
|
10
|
+
class QWebPage;
|
11
|
+
|
12
|
+
class Sunscraper : public QObject
|
13
|
+
{
|
14
|
+
Q_OBJECT
|
15
|
+
|
16
|
+
public:
|
17
|
+
Sunscraper();
|
18
|
+
|
19
|
+
void loadHtml(QString html);
|
20
|
+
void loadUrl(QString url);
|
21
|
+
|
22
|
+
void wait(unsigned timeout);
|
23
|
+
|
24
|
+
QByteArray fetch();
|
25
|
+
const char *fetchAsCString();
|
26
|
+
|
27
|
+
private slots:
|
28
|
+
void finished(unsigned queryId, QString html);
|
29
|
+
void timeout();
|
30
|
+
|
31
|
+
signals:
|
32
|
+
void requestLoadHtml(unsigned queryId, QString html);
|
33
|
+
void requestLoadUrl(unsigned queryId, QString html);
|
34
|
+
void requestFinalize(unsigned queryId);
|
35
|
+
|
36
|
+
private:
|
37
|
+
static unsigned _nextQueryId;
|
38
|
+
static QMutex _staticMutex;
|
39
|
+
|
40
|
+
unsigned _queryId;
|
41
|
+
QEventLoop _eventLoop;
|
42
|
+
QByteArray _html;
|
43
|
+
};
|
44
|
+
|
45
|
+
#endif // SUNSCRAPER_H
|
data/ext/sunscraper.pro
ADDED
@@ -0,0 +1,15 @@
|
|
1
|
+
QT += webkit
|
2
|
+
|
3
|
+
TARGET = sunscraper
|
4
|
+
TEMPLATE = lib
|
5
|
+
|
6
|
+
SOURCES += sunscraperlibrary.cpp \
|
7
|
+
sunscraperthread.cpp \
|
8
|
+
sunscraperexternal.cpp \
|
9
|
+
sunscraper.cpp \
|
10
|
+
sunscraperproxy.cpp
|
11
|
+
|
12
|
+
HEADERS += sunscraperlibrary.h \
|
13
|
+
sunscraperthread.h \
|
14
|
+
sunscraper.h \
|
15
|
+
sunscraperproxy.h
|
@@ -0,0 +1,33 @@
|
|
1
|
+
#include "sunscraper.h"
|
2
|
+
|
3
|
+
extern "C" {
|
4
|
+
Sunscraper *sunscraper_create()
|
5
|
+
{
|
6
|
+
return new Sunscraper();
|
7
|
+
}
|
8
|
+
|
9
|
+
void sunscraper_load_html(Sunscraper *sunscraper, const char *html)
|
10
|
+
{
|
11
|
+
sunscraper->loadHtml(html);
|
12
|
+
}
|
13
|
+
|
14
|
+
void sunscraper_load_url(Sunscraper *sunscraper, const char *url)
|
15
|
+
{
|
16
|
+
sunscraper->loadUrl(url);
|
17
|
+
}
|
18
|
+
|
19
|
+
void sunscraper_wait(Sunscraper *sunscraper, unsigned timeout)
|
20
|
+
{
|
21
|
+
sunscraper->wait(timeout);
|
22
|
+
}
|
23
|
+
|
24
|
+
const char *sunscraper_fetch(Sunscraper *sunscraper)
|
25
|
+
{
|
26
|
+
return sunscraper->fetchAsCString();
|
27
|
+
}
|
28
|
+
|
29
|
+
void sunscraper_discard(Sunscraper *sunscraper)
|
30
|
+
{
|
31
|
+
delete sunscraper;
|
32
|
+
}
|
33
|
+
}
|
@@ -0,0 +1,25 @@
|
|
1
|
+
#include "sunscraperlibrary.h"
|
2
|
+
#include "sunscraperthread.h"
|
3
|
+
|
4
|
+
SunscraperLibrary SunscraperLibrary::_instance;
|
5
|
+
|
6
|
+
SunscraperLibrary::SunscraperLibrary()
|
7
|
+
{
|
8
|
+
_apartmentThread = new SunscraperThread();
|
9
|
+
_apartmentThread->start();
|
10
|
+
}
|
11
|
+
|
12
|
+
SunscraperLibrary::~SunscraperLibrary()
|
13
|
+
{
|
14
|
+
/* Do nothing. This is on purpose. */
|
15
|
+
}
|
16
|
+
|
17
|
+
SunscraperLibrary *SunscraperLibrary::instance()
|
18
|
+
{
|
19
|
+
return &_instance;
|
20
|
+
}
|
21
|
+
|
22
|
+
SunscraperThread *SunscraperLibrary::thread()
|
23
|
+
{
|
24
|
+
return _apartmentThread;
|
25
|
+
}
|
@@ -0,0 +1,22 @@
|
|
1
|
+
#ifndef SUNSCRAPERLIBRARY_H
|
2
|
+
#define SUNSCRAPERLIBRARY_H
|
3
|
+
|
4
|
+
class SunscraperThread;
|
5
|
+
|
6
|
+
class SunscraperLibrary {
|
7
|
+
public:
|
8
|
+
static SunscraperLibrary *instance();
|
9
|
+
|
10
|
+
SunscraperThread *thread();
|
11
|
+
|
12
|
+
private:
|
13
|
+
SunscraperLibrary();
|
14
|
+
SunscraperLibrary(SunscraperLibrary &);
|
15
|
+
~SunscraperLibrary();
|
16
|
+
|
17
|
+
static SunscraperLibrary _instance;
|
18
|
+
|
19
|
+
SunscraperThread *_apartmentThread;
|
20
|
+
};
|
21
|
+
|
22
|
+
#endif // SUNSCRAPER_H
|
@@ -0,0 +1,13 @@
|
|
1
|
+
#include <QWebPage>
|
2
|
+
#include <QWebFrame>
|
3
|
+
#include "sunscraperproxy.h"
|
4
|
+
|
5
|
+
SunscraperProxy::SunscraperProxy(QWebPage *parent, unsigned queryId) :
|
6
|
+
QObject(parent), _webPage(parent), _queryId(queryId)
|
7
|
+
{
|
8
|
+
}
|
9
|
+
|
10
|
+
void SunscraperProxy::finish()
|
11
|
+
{
|
12
|
+
emit finished(_queryId, _webPage->mainFrame()->toHtml());
|
13
|
+
}
|
@@ -0,0 +1,24 @@
|
|
1
|
+
#ifndef SUNSCRAPERPROXY_H
|
2
|
+
#define SUNSCRAPERPROXY_H
|
3
|
+
|
4
|
+
#include <QObject>
|
5
|
+
|
6
|
+
class QWebPage;
|
7
|
+
|
8
|
+
class SunscraperProxy : public QObject
|
9
|
+
{
|
10
|
+
Q_OBJECT
|
11
|
+
public:
|
12
|
+
SunscraperProxy(QWebPage *parent, unsigned queryId);
|
13
|
+
|
14
|
+
Q_INVOKABLE void finish();
|
15
|
+
|
16
|
+
signals:
|
17
|
+
void finished(unsigned _queryId, QString html);
|
18
|
+
|
19
|
+
private:
|
20
|
+
QWebPage *_webPage;
|
21
|
+
unsigned _queryId;
|
22
|
+
};
|
23
|
+
|
24
|
+
#endif // SUNSCRAPERPROXY_H
|
@@ -0,0 +1,67 @@
|
|
1
|
+
#include <QApplication>
|
2
|
+
#include <QWebPage>
|
3
|
+
#include <QWebFrame>
|
4
|
+
#include "sunscraperthread.h"
|
5
|
+
#include "sunscraperproxy.h"
|
6
|
+
|
7
|
+
SunscraperThread::SunscraperThread()
|
8
|
+
{
|
9
|
+
}
|
10
|
+
|
11
|
+
void SunscraperThread::run()
|
12
|
+
{
|
13
|
+
static int argc;
|
14
|
+
static char **argv = {NULL};
|
15
|
+
|
16
|
+
QApplication app(argc, argv);
|
17
|
+
app.exec();
|
18
|
+
|
19
|
+
qFatal("Sunscraper apartment thread event loop should never end");
|
20
|
+
}
|
21
|
+
|
22
|
+
void SunscraperThread::loadHtml(unsigned queryId, QString html)
|
23
|
+
{
|
24
|
+
QWebPage *webPage = initializeWebPage(queryId);
|
25
|
+
webPage->mainFrame()->setHtml(html);
|
26
|
+
}
|
27
|
+
|
28
|
+
void SunscraperThread::loadUrl(unsigned queryId, QString url)
|
29
|
+
{
|
30
|
+
QWebPage *webPage = initializeWebPage(queryId);
|
31
|
+
webPage->mainFrame()->load(url);
|
32
|
+
}
|
33
|
+
|
34
|
+
void SunscraperThread::finalize(unsigned queryId)
|
35
|
+
{
|
36
|
+
Q_ASSERT(_webPages[queryId] != NULL);
|
37
|
+
|
38
|
+
_webPages[queryId]->deleteLater();
|
39
|
+
_webPages.remove(queryId);
|
40
|
+
}
|
41
|
+
|
42
|
+
QWebPage *SunscraperThread::initializeWebPage(unsigned queryId)
|
43
|
+
{
|
44
|
+
Q_ASSERT(_webPages[queryId] == NULL);
|
45
|
+
|
46
|
+
QWebPage *webPage = new QWebPage(this);
|
47
|
+
connect(webPage->mainFrame(), SIGNAL(javaScriptWindowObjectCleared()),
|
48
|
+
this, SLOT(attachAPI()));
|
49
|
+
|
50
|
+
_webPages[queryId] = webPage;
|
51
|
+
|
52
|
+
return webPage;
|
53
|
+
}
|
54
|
+
|
55
|
+
void SunscraperThread::attachAPI()
|
56
|
+
{
|
57
|
+
QWebFrame *origin = static_cast<QWebFrame *>(QObject::sender());
|
58
|
+
QWebPage *page = origin->page();
|
59
|
+
|
60
|
+
unsigned queryId = _webPages.key(page, 0);
|
61
|
+
Q_ASSERT(queryId != 0);
|
62
|
+
|
63
|
+
SunscraperProxy *proxy = new SunscraperProxy(page, queryId);
|
64
|
+
connect(proxy, SIGNAL(finished(uint,QString)), this, SIGNAL(finished(uint,QString)));
|
65
|
+
|
66
|
+
origin->addToJavaScriptWindowObject("Sunscraper", proxy, QScriptEngine::QtOwnership);
|
67
|
+
}
|
@@ -0,0 +1,34 @@
|
|
1
|
+
#ifndef SUNSCRAPERTHREAD_H
|
2
|
+
#define SUNSCRAPERTHREAD_H
|
3
|
+
|
4
|
+
#include <QThread>
|
5
|
+
#include <QMap>
|
6
|
+
|
7
|
+
class QWebPage;
|
8
|
+
|
9
|
+
class SunscraperThread : public QThread
|
10
|
+
{
|
11
|
+
Q_OBJECT
|
12
|
+
public:
|
13
|
+
SunscraperThread();
|
14
|
+
|
15
|
+
void run();
|
16
|
+
|
17
|
+
signals:
|
18
|
+
void finished(unsigned queryId, QString result);
|
19
|
+
|
20
|
+
public slots:
|
21
|
+
void loadHtml(unsigned queryId, QString html);
|
22
|
+
void loadUrl(unsigned queryId, QString url);
|
23
|
+
void finalize(unsigned queryId);
|
24
|
+
|
25
|
+
private slots:
|
26
|
+
void attachAPI();
|
27
|
+
|
28
|
+
private:
|
29
|
+
QMap<unsigned, QWebPage *> _webPages;
|
30
|
+
|
31
|
+
QWebPage *initializeWebPage(unsigned queryId);
|
32
|
+
};
|
33
|
+
|
34
|
+
#endif // SUNSCRAPERTHREAD_H
|
data/lib/sunscraper.rb
ADDED
@@ -0,0 +1,50 @@
|
|
1
|
+
require 'sunscraper/library'
|
2
|
+
|
3
|
+
# Sunscraper loads an HTML page in a headless browser and waits for `Sunscraper.finish()`
|
4
|
+
# method to be called. It blocks the calling thread, but is threadsafe, does
|
5
|
+
# not acquire GIL and thus can be called from multiple threads simultaneously.
|
6
|
+
module Sunscraper
|
7
|
+
# ScrapeTimeout error is raised when the page could not be loaded fast enough.
|
8
|
+
class ScrapeTimeout < StandardError; end
|
9
|
+
|
10
|
+
class << self
|
11
|
+
# Scrape an inline HTML. The content is loaded without a particular base URL.
|
12
|
+
# If your application depends on base URL being available, use {scrape_url}.
|
13
|
+
#
|
14
|
+
# @param [Integer] timeout timeout in milliseconds
|
15
|
+
def scrape_html(html, timeout=5000)
|
16
|
+
scrape(timeout) do |context|
|
17
|
+
Library.load_html context, html
|
18
|
+
end
|
19
|
+
end
|
20
|
+
|
21
|
+
# Scrape an URL.
|
22
|
+
#
|
23
|
+
# @param [Integer] timeout timeout in milliseconds
|
24
|
+
def scrape_url(url, timeout=5000)
|
25
|
+
scrape(timeout) do |context|
|
26
|
+
Library.load_url context, url
|
27
|
+
end
|
28
|
+
end
|
29
|
+
|
30
|
+
private
|
31
|
+
|
32
|
+
def scrape(timeout)
|
33
|
+
context = Library.create
|
34
|
+
|
35
|
+
yield context
|
36
|
+
|
37
|
+
Library.wait(context, timeout)
|
38
|
+
|
39
|
+
data = Library.fetch(context)
|
40
|
+
|
41
|
+
if data == "!SUNSCRAPER_TIMEOUT"
|
42
|
+
raise ScrapeTimeout, "Sunscraper has timed out waiting for the callback"
|
43
|
+
else
|
44
|
+
data
|
45
|
+
end
|
46
|
+
ensure
|
47
|
+
Library.discard(context) if context
|
48
|
+
end
|
49
|
+
end
|
50
|
+
end
|
@@ -0,0 +1,37 @@
|
|
1
|
+
if !defined?(RUBY_ENGINE) && RUBY_VERSION =~ /^1.8/
|
2
|
+
raise RuntimeError, "Sunscraper does not work on Ruby MRI 1.8.x."
|
3
|
+
end
|
4
|
+
|
5
|
+
require 'ffi'
|
6
|
+
|
7
|
+
# @private
|
8
|
+
module Sunscraper::Library
|
9
|
+
extend FFI::Library
|
10
|
+
|
11
|
+
# RbConfig sniffing does not work on JRuby.
|
12
|
+
if Gem.win_platform?
|
13
|
+
extension = 'dll'
|
14
|
+
elsif RUBY_PLATFORM =~ /darwin/i
|
15
|
+
extension = 'dylib'
|
16
|
+
else
|
17
|
+
extension = 'so'
|
18
|
+
end
|
19
|
+
|
20
|
+
ffi_lib File.join(Gem.loaded_specs['sunscraper'].full_gem_path,
|
21
|
+
'ext', "libsunscraper.#{extension}")
|
22
|
+
|
23
|
+
attach_function 'create', :sunscraper_create, [], :pointer
|
24
|
+
attach_function 'load_html', :sunscraper_load_html, [:pointer, :string], :void
|
25
|
+
attach_function 'load_url', :sunscraper_load_url, [:pointer, :string], :void
|
26
|
+
attach_function 'fetch', :sunscraper_fetch, [:pointer], :string
|
27
|
+
attach_function 'discard', :sunscraper_discard, [:pointer], :void
|
28
|
+
|
29
|
+
if RUBY_ENGINE == 'ruby'
|
30
|
+
# MRI uses ffi gem and has GVL. Hence, it needs a rb_thread_blocking_region call.
|
31
|
+
attach_function 'wait', :sunscraper_wait, [:pointer, :uint], :void, :blocking => true
|
32
|
+
else
|
33
|
+
# Rubinius does not have GVL neither it has options in attach_function.
|
34
|
+
# Same for JRuby.
|
35
|
+
attach_function 'wait', :sunscraper_wait, [:pointer, :uint], :void
|
36
|
+
end
|
37
|
+
end
|
data/spec/spec_helper.rb
ADDED
@@ -0,0 +1,45 @@
|
|
1
|
+
require 'spec_helper'
|
2
|
+
|
3
|
+
require 'webrick'
|
4
|
+
|
5
|
+
HTML = <<HTML
|
6
|
+
<html>
|
7
|
+
<head>
|
8
|
+
<script type="text/javascript">
|
9
|
+
document.addEventListener("DOMContentLoaded", function() {
|
10
|
+
document.getElementById('fuga').textContent =
|
11
|
+
("!skrow tI").split("").reverse().join("");
|
12
|
+
Sunscraper.finish();
|
13
|
+
}, true);
|
14
|
+
</script>
|
15
|
+
</head>
|
16
|
+
<body>
|
17
|
+
<div id='fuga'></div>
|
18
|
+
</body>
|
19
|
+
</html>
|
20
|
+
HTML
|
21
|
+
|
22
|
+
PORT = 45555
|
23
|
+
|
24
|
+
describe Sunscraper do
|
25
|
+
it "can scrape an HTML provided as a string" do
|
26
|
+
Sunscraper.scrape_html(HTML).should include('It works!')
|
27
|
+
end
|
28
|
+
|
29
|
+
it "can scrape an URL" do
|
30
|
+
server = WEBrick::HTTPServer.new :Port => PORT, :Logger => WEBrick::Log.new('/dev/null'), :AccessLog => []
|
31
|
+
server.mount_proc '/' do |req, res|
|
32
|
+
res.body = HTML
|
33
|
+
end
|
34
|
+
Thread.new { server.start }
|
35
|
+
|
36
|
+
Sunscraper.scrape_url("http://localhost:#{PORT}/").should include('It works!')
|
37
|
+
|
38
|
+
server.stop
|
39
|
+
end
|
40
|
+
|
41
|
+
it "should time out if callback is not called" do
|
42
|
+
lambda { Sunscraper.scrape_html("<!-- nothing. at least no callbacks -->", 1000) }.
|
43
|
+
should raise_exception(Sunscraper::ScrapeTimeout)
|
44
|
+
end
|
45
|
+
end
|
data/sunscraper.gemspec
ADDED
@@ -0,0 +1,23 @@
|
|
1
|
+
# -*- encoding: utf-8 -*-
|
2
|
+
$:.push File.expand_path("../lib", __FILE__)
|
3
|
+
|
4
|
+
Gem::Specification.new do |s|
|
5
|
+
s.name = "sunscraper"
|
6
|
+
s.version = "1.0.0"
|
7
|
+
s.authors = ["Peter Zotov"]
|
8
|
+
s.email = ["whitequark@whitequark.org"]
|
9
|
+
s.homepage = "http://github.com/roundlake/sunscraper"
|
10
|
+
s.summary = %q{A WebKit-based, JavaScript-capable HTML scraper.}
|
11
|
+
s.description = s.summary
|
12
|
+
|
13
|
+
s.rubyforge_project = "sunscraper"
|
14
|
+
|
15
|
+
s.files = `git ls-files`.split("\n")
|
16
|
+
s.test_files = `git ls-files -- {test,spec,features}/*`.split("\n")
|
17
|
+
s.executables = `git ls-files -- bin/*`.split("\n").map{ |f| File.basename(f) }
|
18
|
+
s.extensions = ["ext/extconf.rb"]
|
19
|
+
s.require_paths = ["lib"]
|
20
|
+
|
21
|
+
s.add_development_dependency "rspec"
|
22
|
+
s.add_runtime_dependency "ffi", '>= 1.0.11'
|
23
|
+
end
|
metadata
ADDED
@@ -0,0 +1,120 @@
|
|
1
|
+
--- !ruby/object:Gem::Specification
|
2
|
+
name: sunscraper
|
3
|
+
version: !ruby/object:Gem::Version
|
4
|
+
hash: 540260530
|
5
|
+
prerelease:
|
6
|
+
segments:
|
7
|
+
- 1
|
8
|
+
- 0
|
9
|
+
- 0
|
10
|
+
version: 1.0.0
|
11
|
+
platform: ruby
|
12
|
+
authors:
|
13
|
+
- Peter Zotov
|
14
|
+
autorequire:
|
15
|
+
bindir: bin
|
16
|
+
cert_chain: []
|
17
|
+
|
18
|
+
date: 2012-02-18 00:00:00 Z
|
19
|
+
dependencies:
|
20
|
+
- !ruby/object:Gem::Dependency
|
21
|
+
name: rspec
|
22
|
+
prerelease: false
|
23
|
+
requirement: &id001 !ruby/object:Gem::Requirement
|
24
|
+
none: false
|
25
|
+
requirements:
|
26
|
+
- - ">="
|
27
|
+
- !ruby/object:Gem::Version
|
28
|
+
hash: 881230260
|
29
|
+
segments:
|
30
|
+
- 0
|
31
|
+
version: "0"
|
32
|
+
type: :development
|
33
|
+
version_requirements: *id001
|
34
|
+
- !ruby/object:Gem::Dependency
|
35
|
+
name: ffi
|
36
|
+
prerelease: false
|
37
|
+
requirement: &id002 !ruby/object:Gem::Requirement
|
38
|
+
none: false
|
39
|
+
requirements:
|
40
|
+
- - ">="
|
41
|
+
- !ruby/object:Gem::Version
|
42
|
+
hash: 716237343
|
43
|
+
segments:
|
44
|
+
- 1
|
45
|
+
- 0
|
46
|
+
- 11
|
47
|
+
version: 1.0.11
|
48
|
+
type: :runtime
|
49
|
+
version_requirements: *id002
|
50
|
+
description: A WebKit-based, JavaScript-capable HTML scraper.
|
51
|
+
email:
|
52
|
+
- whitequark@whitequark.org
|
53
|
+
executables: []
|
54
|
+
|
55
|
+
extensions:
|
56
|
+
- ext/extconf.rb
|
57
|
+
extra_rdoc_files: []
|
58
|
+
|
59
|
+
files:
|
60
|
+
- .gitignore
|
61
|
+
- .rspec
|
62
|
+
- .yardopts
|
63
|
+
- Gemfile
|
64
|
+
- LICENSE
|
65
|
+
- README.md
|
66
|
+
- Rakefile
|
67
|
+
- ext/.gitignore
|
68
|
+
- ext/Makefile
|
69
|
+
- ext/extconf.rb
|
70
|
+
- ext/sunscraper.cpp
|
71
|
+
- ext/sunscraper.h
|
72
|
+
- ext/sunscraper.pro
|
73
|
+
- ext/sunscraperexternal.cpp
|
74
|
+
- ext/sunscraperlibrary.cpp
|
75
|
+
- ext/sunscraperlibrary.h
|
76
|
+
- ext/sunscraperproxy.cpp
|
77
|
+
- ext/sunscraperproxy.h
|
78
|
+
- ext/sunscraperthread.cpp
|
79
|
+
- ext/sunscraperthread.h
|
80
|
+
- lib/sunscraper.rb
|
81
|
+
- lib/sunscraper/library.rb
|
82
|
+
- spec/spec_helper.rb
|
83
|
+
- spec/sunscraper_spec.rb
|
84
|
+
- sunscraper.gemspec
|
85
|
+
homepage: http://github.com/roundlake/sunscraper
|
86
|
+
licenses: []
|
87
|
+
|
88
|
+
post_install_message:
|
89
|
+
rdoc_options: []
|
90
|
+
|
91
|
+
require_paths:
|
92
|
+
- lib
|
93
|
+
required_ruby_version: !ruby/object:Gem::Requirement
|
94
|
+
none: false
|
95
|
+
requirements:
|
96
|
+
- - ">="
|
97
|
+
- !ruby/object:Gem::Version
|
98
|
+
hash: 881230260
|
99
|
+
segments:
|
100
|
+
- 0
|
101
|
+
version: "0"
|
102
|
+
required_rubygems_version: !ruby/object:Gem::Requirement
|
103
|
+
none: false
|
104
|
+
requirements:
|
105
|
+
- - ">="
|
106
|
+
- !ruby/object:Gem::Version
|
107
|
+
hash: 881230260
|
108
|
+
segments:
|
109
|
+
- 0
|
110
|
+
version: "0"
|
111
|
+
requirements: []
|
112
|
+
|
113
|
+
rubyforge_project: sunscraper
|
114
|
+
rubygems_version: 1.8.12
|
115
|
+
signing_key:
|
116
|
+
specification_version: 3
|
117
|
+
summary: A WebKit-based, JavaScript-capable HTML scraper.
|
118
|
+
test_files: []
|
119
|
+
|
120
|
+
has_rdoc:
|