sunscraper 1.2.0.beta1 → 1.2.0.pre1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/ext/.gitignore +2 -0
- data/ext/common/sunscraperworker.cpp +4 -1
- data/ext/embed/sunscraperinterface.cpp +2 -3
- data/ext/embed/sunscraperthread.cpp +5 -0
- data/ext/extconf.rb +7 -23
- data/ext/standalone/sunscraperrpc.cpp +23 -1
- data/spec/sunscraper_spec.rb +11 -9
- data/sunscraper.gemspec +1 -1
- metadata +6 -7
- data/ext/common/libsunscraper_common.a +0 -0
data/ext/.gitignore
CHANGED
@@ -86,7 +86,10 @@ void SunscraperWorker::onJavascriptObjectCleared()
|
|
86
86
|
QWebPage *page = origin->page();
|
87
87
|
|
88
88
|
unsigned queryId = m_webPages.key(page, 0);
|
89
|
-
|
89
|
+
|
90
|
+
/* Called on an already finalized page in the process of finalization. */
|
91
|
+
if(queryId == 0)
|
92
|
+
return;
|
90
93
|
|
91
94
|
SunscraperProxy *proxy = new SunscraperProxy(page, queryId);
|
92
95
|
connect(proxy, SIGNAL(finished(uint)), this, SLOT(onFinish(uint)));
|
@@ -82,9 +82,8 @@ void SunscraperInterface::signalSemaphore(unsigned queryId)
|
|
82
82
|
{
|
83
83
|
QMutexLocker locker(&m_semaphoresMutex);
|
84
84
|
|
85
|
-
|
86
|
-
|
87
|
-
m_semaphores[queryId]->release(1);
|
85
|
+
if(m_semaphores.contains(queryId))
|
86
|
+
m_semaphores[queryId]->release(1);
|
88
87
|
}
|
89
88
|
|
90
89
|
unsigned SunscraperInterface::createQuery()
|
@@ -1,4 +1,5 @@
|
|
1
1
|
#include <QApplication>
|
2
|
+
#include <qnamespace.h>
|
2
3
|
#include <QtDebug>
|
3
4
|
#include "sunscraperthread.h"
|
4
5
|
#include "sunscraperworker.h"
|
@@ -7,6 +8,8 @@
|
|
7
8
|
pthread_t SunscraperThread::m_thread;
|
8
9
|
#endif
|
9
10
|
|
11
|
+
extern void qt_set_current_thread_to_main_thread();
|
12
|
+
|
10
13
|
void SunscraperThread::invoke()
|
11
14
|
{
|
12
15
|
#if defined(Q_OS_LINUX) || defined(Q_OS_UNIX)
|
@@ -23,6 +26,8 @@ void *SunscraperThread::thread_routine(void *)
|
|
23
26
|
/* Why (char*)? Because argv can (theoretically) be modified. *
|
24
27
|
* But Qt won't do that with argv[0]. I know, trust me. */
|
25
28
|
|
29
|
+
QInternal::callFunction(QInternal::SetCurrentThreadToMainThread, NULL);
|
30
|
+
|
26
31
|
QApplication app(argc, argv);
|
27
32
|
app.setApplicationName("Sunscraper-Embed");
|
28
33
|
|
data/ext/extconf.rb
CHANGED
@@ -2,32 +2,16 @@
|
|
2
2
|
|
3
3
|
require 'rbconfig'
|
4
4
|
|
5
|
-
if
|
6
|
-
|
7
|
-
# Win32 wins again.
|
8
|
-
qmake = %{qmake CONFIG+=debug -spec macx-g++}
|
9
|
-
|
10
|
-
File.open("Makefile", "w") do |mf|
|
11
|
-
mf.puts <<-ENDM
|
12
|
-
all:
|
13
|
-
(cd embed && #{qmake}; make)
|
14
|
-
(cd standalone && #{qmake}; make)
|
15
|
-
install:
|
16
|
-
# do nothing
|
17
|
-
ENDM
|
18
|
-
end
|
5
|
+
if Gem.win_platform?
|
6
|
+
qmake = %{qmake CONFIG+=debug -spec win32-g++}
|
19
7
|
else
|
20
|
-
|
21
|
-
|
22
|
-
else
|
23
|
-
qmake = %{qmake CONFIG+=debug}
|
24
|
-
end
|
8
|
+
qmake = %{qmake CONFIG+=debug}
|
9
|
+
end
|
25
10
|
|
26
|
-
|
27
|
-
|
11
|
+
File.open("Makefile", "w") do |mf|
|
12
|
+
mf.puts <<-ENDM
|
28
13
|
all:
|
29
14
|
#{qmake}
|
30
15
|
make
|
31
|
-
|
32
|
-
end
|
16
|
+
ENDM
|
33
17
|
end
|
@@ -7,11 +7,13 @@
|
|
7
7
|
#include <sunscraperworker.h>
|
8
8
|
#include "sunscraperrpc.h"
|
9
9
|
|
10
|
+
//#define DEBUG_SUNSCRAPERRPC
|
11
|
+
|
10
12
|
SunscraperWorker *SunscraperRPC::m_worker;
|
11
13
|
unsigned SunscraperRPC::m_nextQueryId;
|
12
14
|
|
13
15
|
SunscraperRPC::SunscraperRPC(QLocalSocket *socket) :
|
14
|
-
m_socket(socket), m_state(StateHeader)
|
16
|
+
m_socket(socket), m_state(StateHeader), m_result(false)
|
15
17
|
{
|
16
18
|
m_nextQueryId += 1;
|
17
19
|
m_queryId = m_nextQueryId;
|
@@ -80,6 +82,10 @@ void SunscraperRPC::onInputDisconnected()
|
|
80
82
|
|
81
83
|
void SunscraperRPC::processRequest(unsigned requestType, QByteArray data)
|
82
84
|
{
|
85
|
+
#ifdef DEBUG_SUNSCRAPERRPC
|
86
|
+
qDebug() << QString("request(%1)").arg(m_queryId) << requestType << data;
|
87
|
+
#endif
|
88
|
+
|
83
89
|
switch(requestType) {
|
84
90
|
case RPC_LOAD_HTML: {
|
85
91
|
QDataStream stream(data);
|
@@ -127,6 +133,10 @@ void SunscraperRPC::onFinish(unsigned eventQueryId)
|
|
127
133
|
if(eventQueryId != m_queryId)
|
128
134
|
return;
|
129
135
|
|
136
|
+
#ifdef DEBUG_SUNSCRAPERRPC
|
137
|
+
qDebug() << QString("finish(%1)").arg(m_queryId);
|
138
|
+
#endif
|
139
|
+
|
130
140
|
QByteArray data;
|
131
141
|
|
132
142
|
QDataStream stream(&data, QIODevice::WriteOnly);
|
@@ -142,6 +152,10 @@ void SunscraperRPC::onTimeout(unsigned eventQueryId)
|
|
142
152
|
if(eventQueryId != m_queryId)
|
143
153
|
return;
|
144
154
|
|
155
|
+
#ifdef DEBUG_SUNSCRAPERRPC
|
156
|
+
qDebug() << QString("timeout(%1)").arg(m_queryId);
|
157
|
+
#endif
|
158
|
+
|
145
159
|
QByteArray data;
|
146
160
|
|
147
161
|
QDataStream stream(&data, QIODevice::WriteOnly);
|
@@ -157,6 +171,10 @@ void SunscraperRPC::onFetchDone(unsigned eventQueryId, QString data)
|
|
157
171
|
if(eventQueryId != m_queryId)
|
158
172
|
return;
|
159
173
|
|
174
|
+
#ifdef DEBUG_SUNSCRAPERRPC
|
175
|
+
qDebug() << QString("fetchDone(%1)").arg(m_queryId);
|
176
|
+
#endif
|
177
|
+
|
160
178
|
sendReply(data.toLocal8Bit());
|
161
179
|
}
|
162
180
|
|
@@ -164,6 +182,10 @@ void SunscraperRPC::sendReply(QByteArray data)
|
|
164
182
|
{
|
165
183
|
QByteArray packet;
|
166
184
|
|
185
|
+
#ifdef DEBUG_SUNSCRAPERRPC
|
186
|
+
qDebug() << QString("reply(%1)").arg(m_queryId) << data;
|
187
|
+
#endif
|
188
|
+
|
167
189
|
QDataStream stream(&packet, QIODevice::WriteOnly);
|
168
190
|
stream << data;
|
169
191
|
|
data/spec/sunscraper_spec.rb
CHANGED
@@ -105,16 +105,18 @@ define_tests = lambda do |klass, worker|
|
|
105
105
|
end
|
106
106
|
end
|
107
107
|
|
108
|
-
|
109
|
-
|
110
|
-
|
111
|
-
|
108
|
+
if worker == :embed
|
109
|
+
it "should withstand a lot of concurrent threads" do
|
110
|
+
100.times.map {
|
111
|
+
Thread.new {
|
112
|
+
Sunscraper.scrape_html(HTML_FUGA)
|
113
|
+
}
|
114
|
+
}.each(&:join).
|
115
|
+
map(&:value).
|
116
|
+
each { |result|
|
117
|
+
result.should include('It works!')
|
112
118
|
}
|
113
|
-
|
114
|
-
map(&:value).
|
115
|
-
each { |result|
|
116
|
-
result.should include('It works!')
|
117
|
-
}
|
119
|
+
end
|
118
120
|
end
|
119
121
|
end
|
120
122
|
end
|
data/sunscraper.gemspec
CHANGED
@@ -3,7 +3,7 @@ $:.push File.expand_path("../lib", __FILE__)
|
|
3
3
|
|
4
4
|
Gem::Specification.new do |s|
|
5
5
|
s.name = "sunscraper"
|
6
|
-
s.version = "1.2.0.
|
6
|
+
s.version = "1.2.0.pre1"
|
7
7
|
s.authors = ["Peter Zotov"]
|
8
8
|
s.email = ["whitequark@whitequark.org"]
|
9
9
|
s.homepage = "http://github.com/whitequark/sunscraper"
|
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: sunscraper
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 1.2.0.
|
4
|
+
version: 1.2.0.pre1
|
5
5
|
prerelease: 6
|
6
6
|
platform: ruby
|
7
7
|
authors:
|
@@ -9,11 +9,11 @@ authors:
|
|
9
9
|
autorequire:
|
10
10
|
bindir: bin
|
11
11
|
cert_chain: []
|
12
|
-
date: 2012-03-
|
12
|
+
date: 2012-03-19 00:00:00.000000000 Z
|
13
13
|
dependencies:
|
14
14
|
- !ruby/object:Gem::Dependency
|
15
15
|
name: rspec
|
16
|
-
requirement: &
|
16
|
+
requirement: &85313840 !ruby/object:Gem::Requirement
|
17
17
|
none: false
|
18
18
|
requirements:
|
19
19
|
- - ! '>='
|
@@ -21,10 +21,10 @@ dependencies:
|
|
21
21
|
version: '0'
|
22
22
|
type: :development
|
23
23
|
prerelease: false
|
24
|
-
version_requirements: *
|
24
|
+
version_requirements: *85313840
|
25
25
|
- !ruby/object:Gem::Dependency
|
26
26
|
name: ffi
|
27
|
-
requirement: &
|
27
|
+
requirement: &85313520 !ruby/object:Gem::Requirement
|
28
28
|
none: false
|
29
29
|
requirements:
|
30
30
|
- - ! '>='
|
@@ -32,7 +32,7 @@ dependencies:
|
|
32
32
|
version: 1.0.11
|
33
33
|
type: :runtime
|
34
34
|
prerelease: false
|
35
|
-
version_requirements: *
|
35
|
+
version_requirements: *85313520
|
36
36
|
description: A WebKit-based, JavaScript-capable HTML scraper.
|
37
37
|
email:
|
38
38
|
- whitequark@whitequark.org
|
@@ -51,7 +51,6 @@ files:
|
|
51
51
|
- Rakefile
|
52
52
|
- ext/.gitignore
|
53
53
|
- ext/common/common.pro
|
54
|
-
- ext/common/libsunscraper_common.a
|
55
54
|
- ext/common/sunscraperproxy.cpp
|
56
55
|
- ext/common/sunscraperproxy.h
|
57
56
|
- ext/common/sunscraperwebpage.cpp
|
Binary file
|