sunscraper 1.2.0.beta1 → 1.2.0.pre1
Sign up to get free protection for your applications and to get access to all the features.
- data/ext/.gitignore +2 -0
- data/ext/common/sunscraperworker.cpp +4 -1
- data/ext/embed/sunscraperinterface.cpp +2 -3
- data/ext/embed/sunscraperthread.cpp +5 -0
- data/ext/extconf.rb +7 -23
- data/ext/standalone/sunscraperrpc.cpp +23 -1
- data/spec/sunscraper_spec.rb +11 -9
- data/sunscraper.gemspec +1 -1
- metadata +6 -7
- data/ext/common/libsunscraper_common.a +0 -0
data/ext/.gitignore
CHANGED
@@ -86,7 +86,10 @@ void SunscraperWorker::onJavascriptObjectCleared()
|
|
86
86
|
QWebPage *page = origin->page();
|
87
87
|
|
88
88
|
unsigned queryId = m_webPages.key(page, 0);
|
89
|
-
|
89
|
+
|
90
|
+
/* Called on an already finalized page in the process of finalization. */
|
91
|
+
if(queryId == 0)
|
92
|
+
return;
|
90
93
|
|
91
94
|
SunscraperProxy *proxy = new SunscraperProxy(page, queryId);
|
92
95
|
connect(proxy, SIGNAL(finished(uint)), this, SLOT(onFinish(uint)));
|
@@ -82,9 +82,8 @@ void SunscraperInterface::signalSemaphore(unsigned queryId)
|
|
82
82
|
{
|
83
83
|
QMutexLocker locker(&m_semaphoresMutex);
|
84
84
|
|
85
|
-
|
86
|
-
|
87
|
-
m_semaphores[queryId]->release(1);
|
85
|
+
if(m_semaphores.contains(queryId))
|
86
|
+
m_semaphores[queryId]->release(1);
|
88
87
|
}
|
89
88
|
|
90
89
|
unsigned SunscraperInterface::createQuery()
|
@@ -1,4 +1,5 @@
|
|
1
1
|
#include <QApplication>
|
2
|
+
#include <qnamespace.h>
|
2
3
|
#include <QtDebug>
|
3
4
|
#include "sunscraperthread.h"
|
4
5
|
#include "sunscraperworker.h"
|
@@ -7,6 +8,8 @@
|
|
7
8
|
pthread_t SunscraperThread::m_thread;
|
8
9
|
#endif
|
9
10
|
|
11
|
+
extern void qt_set_current_thread_to_main_thread();
|
12
|
+
|
10
13
|
void SunscraperThread::invoke()
|
11
14
|
{
|
12
15
|
#if defined(Q_OS_LINUX) || defined(Q_OS_UNIX)
|
@@ -23,6 +26,8 @@ void *SunscraperThread::thread_routine(void *)
|
|
23
26
|
/* Why (char*)? Because argv can (theoretically) be modified. *
|
24
27
|
* But Qt won't do that with argv[0]. I know, trust me. */
|
25
28
|
|
29
|
+
QInternal::callFunction(QInternal::SetCurrentThreadToMainThread, NULL);
|
30
|
+
|
26
31
|
QApplication app(argc, argv);
|
27
32
|
app.setApplicationName("Sunscraper-Embed");
|
28
33
|
|
data/ext/extconf.rb
CHANGED
@@ -2,32 +2,16 @@
|
|
2
2
|
|
3
3
|
require 'rbconfig'
|
4
4
|
|
5
|
-
if
|
6
|
-
|
7
|
-
# Win32 wins again.
|
8
|
-
qmake = %{qmake CONFIG+=debug -spec macx-g++}
|
9
|
-
|
10
|
-
File.open("Makefile", "w") do |mf|
|
11
|
-
mf.puts <<-ENDM
|
12
|
-
all:
|
13
|
-
(cd embed && #{qmake}; make)
|
14
|
-
(cd standalone && #{qmake}; make)
|
15
|
-
install:
|
16
|
-
# do nothing
|
17
|
-
ENDM
|
18
|
-
end
|
5
|
+
if Gem.win_platform?
|
6
|
+
qmake = %{qmake CONFIG+=debug -spec win32-g++}
|
19
7
|
else
|
20
|
-
|
21
|
-
|
22
|
-
else
|
23
|
-
qmake = %{qmake CONFIG+=debug}
|
24
|
-
end
|
8
|
+
qmake = %{qmake CONFIG+=debug}
|
9
|
+
end
|
25
10
|
|
26
|
-
|
27
|
-
|
11
|
+
File.open("Makefile", "w") do |mf|
|
12
|
+
mf.puts <<-ENDM
|
28
13
|
all:
|
29
14
|
#{qmake}
|
30
15
|
make
|
31
|
-
|
32
|
-
end
|
16
|
+
ENDM
|
33
17
|
end
|
@@ -7,11 +7,13 @@
|
|
7
7
|
#include <sunscraperworker.h>
|
8
8
|
#include "sunscraperrpc.h"
|
9
9
|
|
10
|
+
//#define DEBUG_SUNSCRAPERRPC
|
11
|
+
|
10
12
|
SunscraperWorker *SunscraperRPC::m_worker;
|
11
13
|
unsigned SunscraperRPC::m_nextQueryId;
|
12
14
|
|
13
15
|
SunscraperRPC::SunscraperRPC(QLocalSocket *socket) :
|
14
|
-
m_socket(socket), m_state(StateHeader)
|
16
|
+
m_socket(socket), m_state(StateHeader), m_result(false)
|
15
17
|
{
|
16
18
|
m_nextQueryId += 1;
|
17
19
|
m_queryId = m_nextQueryId;
|
@@ -80,6 +82,10 @@ void SunscraperRPC::onInputDisconnected()
|
|
80
82
|
|
81
83
|
void SunscraperRPC::processRequest(unsigned requestType, QByteArray data)
|
82
84
|
{
|
85
|
+
#ifdef DEBUG_SUNSCRAPERRPC
|
86
|
+
qDebug() << QString("request(%1)").arg(m_queryId) << requestType << data;
|
87
|
+
#endif
|
88
|
+
|
83
89
|
switch(requestType) {
|
84
90
|
case RPC_LOAD_HTML: {
|
85
91
|
QDataStream stream(data);
|
@@ -127,6 +133,10 @@ void SunscraperRPC::onFinish(unsigned eventQueryId)
|
|
127
133
|
if(eventQueryId != m_queryId)
|
128
134
|
return;
|
129
135
|
|
136
|
+
#ifdef DEBUG_SUNSCRAPERRPC
|
137
|
+
qDebug() << QString("finish(%1)").arg(m_queryId);
|
138
|
+
#endif
|
139
|
+
|
130
140
|
QByteArray data;
|
131
141
|
|
132
142
|
QDataStream stream(&data, QIODevice::WriteOnly);
|
@@ -142,6 +152,10 @@ void SunscraperRPC::onTimeout(unsigned eventQueryId)
|
|
142
152
|
if(eventQueryId != m_queryId)
|
143
153
|
return;
|
144
154
|
|
155
|
+
#ifdef DEBUG_SUNSCRAPERRPC
|
156
|
+
qDebug() << QString("timeout(%1)").arg(m_queryId);
|
157
|
+
#endif
|
158
|
+
|
145
159
|
QByteArray data;
|
146
160
|
|
147
161
|
QDataStream stream(&data, QIODevice::WriteOnly);
|
@@ -157,6 +171,10 @@ void SunscraperRPC::onFetchDone(unsigned eventQueryId, QString data)
|
|
157
171
|
if(eventQueryId != m_queryId)
|
158
172
|
return;
|
159
173
|
|
174
|
+
#ifdef DEBUG_SUNSCRAPERRPC
|
175
|
+
qDebug() << QString("fetchDone(%1)").arg(m_queryId);
|
176
|
+
#endif
|
177
|
+
|
160
178
|
sendReply(data.toLocal8Bit());
|
161
179
|
}
|
162
180
|
|
@@ -164,6 +182,10 @@ void SunscraperRPC::sendReply(QByteArray data)
|
|
164
182
|
{
|
165
183
|
QByteArray packet;
|
166
184
|
|
185
|
+
#ifdef DEBUG_SUNSCRAPERRPC
|
186
|
+
qDebug() << QString("reply(%1)").arg(m_queryId) << data;
|
187
|
+
#endif
|
188
|
+
|
167
189
|
QDataStream stream(&packet, QIODevice::WriteOnly);
|
168
190
|
stream << data;
|
169
191
|
|
data/spec/sunscraper_spec.rb
CHANGED
@@ -105,16 +105,18 @@ define_tests = lambda do |klass, worker|
|
|
105
105
|
end
|
106
106
|
end
|
107
107
|
|
108
|
-
|
109
|
-
|
110
|
-
|
111
|
-
|
108
|
+
if worker == :embed
|
109
|
+
it "should withstand a lot of concurrent threads" do
|
110
|
+
100.times.map {
|
111
|
+
Thread.new {
|
112
|
+
Sunscraper.scrape_html(HTML_FUGA)
|
113
|
+
}
|
114
|
+
}.each(&:join).
|
115
|
+
map(&:value).
|
116
|
+
each { |result|
|
117
|
+
result.should include('It works!')
|
112
118
|
}
|
113
|
-
|
114
|
-
map(&:value).
|
115
|
-
each { |result|
|
116
|
-
result.should include('It works!')
|
117
|
-
}
|
119
|
+
end
|
118
120
|
end
|
119
121
|
end
|
120
122
|
end
|
data/sunscraper.gemspec
CHANGED
@@ -3,7 +3,7 @@ $:.push File.expand_path("../lib", __FILE__)
|
|
3
3
|
|
4
4
|
Gem::Specification.new do |s|
|
5
5
|
s.name = "sunscraper"
|
6
|
-
s.version = "1.2.0.
|
6
|
+
s.version = "1.2.0.pre1"
|
7
7
|
s.authors = ["Peter Zotov"]
|
8
8
|
s.email = ["whitequark@whitequark.org"]
|
9
9
|
s.homepage = "http://github.com/whitequark/sunscraper"
|
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: sunscraper
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 1.2.0.
|
4
|
+
version: 1.2.0.pre1
|
5
5
|
prerelease: 6
|
6
6
|
platform: ruby
|
7
7
|
authors:
|
@@ -9,11 +9,11 @@ authors:
|
|
9
9
|
autorequire:
|
10
10
|
bindir: bin
|
11
11
|
cert_chain: []
|
12
|
-
date: 2012-03-
|
12
|
+
date: 2012-03-19 00:00:00.000000000 Z
|
13
13
|
dependencies:
|
14
14
|
- !ruby/object:Gem::Dependency
|
15
15
|
name: rspec
|
16
|
-
requirement: &
|
16
|
+
requirement: &85313840 !ruby/object:Gem::Requirement
|
17
17
|
none: false
|
18
18
|
requirements:
|
19
19
|
- - ! '>='
|
@@ -21,10 +21,10 @@ dependencies:
|
|
21
21
|
version: '0'
|
22
22
|
type: :development
|
23
23
|
prerelease: false
|
24
|
-
version_requirements: *
|
24
|
+
version_requirements: *85313840
|
25
25
|
- !ruby/object:Gem::Dependency
|
26
26
|
name: ffi
|
27
|
-
requirement: &
|
27
|
+
requirement: &85313520 !ruby/object:Gem::Requirement
|
28
28
|
none: false
|
29
29
|
requirements:
|
30
30
|
- - ! '>='
|
@@ -32,7 +32,7 @@ dependencies:
|
|
32
32
|
version: 1.0.11
|
33
33
|
type: :runtime
|
34
34
|
prerelease: false
|
35
|
-
version_requirements: *
|
35
|
+
version_requirements: *85313520
|
36
36
|
description: A WebKit-based, JavaScript-capable HTML scraper.
|
37
37
|
email:
|
38
38
|
- whitequark@whitequark.org
|
@@ -51,7 +51,6 @@ files:
|
|
51
51
|
- Rakefile
|
52
52
|
- ext/.gitignore
|
53
53
|
- ext/common/common.pro
|
54
|
-
- ext/common/libsunscraper_common.a
|
55
54
|
- ext/common/sunscraperproxy.cpp
|
56
55
|
- ext/common/sunscraperproxy.h
|
57
56
|
- ext/common/sunscraperwebpage.cpp
|
Binary file
|