sunscraper 1.1.0.beta2 → 1.1.0.beta3
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/ext/embed/{sunscraper.pro → embed.pro} +4 -2
- data/ext/embed/sunscraper.cpp +2 -2
- data/ext/embed/sunscraperexternal.cpp +2 -2
- data/ext/embed/sunscraperlibrary.cpp +2 -2
- data/ext/embed/sunscraperlibrary.h +1 -1
- data/ext/embed/sunscraperwebpage.cpp +12 -0
- data/ext/embed/sunscraperwebpage.h +20 -0
- data/ext/embed/{sunscraperthread.cpp → sunscraperworker.cpp} +37 -22
- data/ext/embed/{sunscraperthread.h → sunscraperworker.h} +13 -9
- data/ext/extconf.rb +24 -15
- data/ext/standalone/{sunscraper.pro → standalone.pro} +0 -0
- data/ext/standalone/sunscraperproxy.cpp +1 -0
- data/ext/standalone/sunscraperworker.cpp +2 -0
- data/ext/sunscraper-ext.pro +2 -0
- data/sunscraper.gemspec +1 -1
- metadata +13 -11
@@ -4,13 +4,15 @@ TARGET = sunscraper
|
|
4
4
|
TEMPLATE = lib
|
5
5
|
|
6
6
|
SOURCES += sunscraperlibrary.cpp \
|
7
|
-
|
7
|
+
sunscraperworker.cpp \
|
8
|
+
sunscraperwebpage.cpp \
|
8
9
|
sunscraperexternal.cpp \
|
9
10
|
sunscraper.cpp \
|
10
11
|
sunscraperproxy.cpp
|
11
12
|
|
12
13
|
HEADERS += sunscraperlibrary.h \
|
13
|
-
|
14
|
+
sunscraperworker.h \
|
15
|
+
sunscraperwebpage.h \
|
14
16
|
sunscraper.h \
|
15
17
|
sunscraperproxy.h
|
16
18
|
|
data/ext/embed/sunscraper.cpp
CHANGED
@@ -8,7 +8,7 @@
|
|
8
8
|
#include <QtDebug>
|
9
9
|
#include "sunscraper.h"
|
10
10
|
#include "sunscraperlibrary.h"
|
11
|
-
#include "
|
11
|
+
#include "sunscraperworker.h"
|
12
12
|
|
13
13
|
unsigned Sunscraper::m_nextQueryId = 1;
|
14
14
|
QMutex Sunscraper::m_staticMutex;
|
@@ -19,7 +19,7 @@ Sunscraper::Sunscraper()
|
|
19
19
|
|
20
20
|
m_queryId = m_nextQueryId++;
|
21
21
|
|
22
|
-
|
22
|
+
SunscraperWorker *worker = SunscraperWorker::instance();
|
23
23
|
if(worker == NULL)
|
24
24
|
qFatal("Attempt to run Sunscraper before thread initialization");
|
25
25
|
|
@@ -1,5 +1,5 @@
|
|
1
1
|
#include "sunscraper.h"
|
2
|
-
#include "
|
2
|
+
#include "sunscraperworker.h"
|
3
3
|
|
4
4
|
extern "C" {
|
5
5
|
Sunscraper *sunscraper_create()
|
@@ -34,6 +34,6 @@ extern "C" {
|
|
34
34
|
|
35
35
|
void sunscraper_finalize()
|
36
36
|
{
|
37
|
-
|
37
|
+
SunscraperWorker::commitSuicide();
|
38
38
|
}
|
39
39
|
}
|
@@ -1,12 +1,12 @@
|
|
1
1
|
#include "sunscraperlibrary.h"
|
2
|
-
#include "
|
2
|
+
#include "sunscraperworker.h"
|
3
3
|
#include <QtDebug>
|
4
4
|
|
5
5
|
SunscraperLibrary SunscraperLibrary::m_instance;
|
6
6
|
|
7
7
|
SunscraperLibrary::SunscraperLibrary()
|
8
8
|
{
|
9
|
-
|
9
|
+
SunscraperWorker::invoke();
|
10
10
|
}
|
11
11
|
|
12
12
|
SunscraperLibrary::~SunscraperLibrary()
|
@@ -0,0 +1,12 @@
|
|
1
|
+
#include "sunscraperwebpage.h"
|
2
|
+
|
3
|
+
SunscraperWebPage::SunscraperWebPage(QObject *parent) :
|
4
|
+
QWebPage(parent)
|
5
|
+
{
|
6
|
+
}
|
7
|
+
|
8
|
+
void SunscraperWebPage::javaScriptConsoleMessage(const QString &message,
|
9
|
+
int lineNumber, const QString &sourceID)
|
10
|
+
{
|
11
|
+
emit consoleMessage(QString("%1:%2> %3").arg(sourceID).arg(lineNumber).arg(message));
|
12
|
+
}
|
@@ -0,0 +1,20 @@
|
|
1
|
+
#ifndef SUNSCRAPERWEBPAGE_H
|
2
|
+
#define SUNSCRAPERWEBPAGE_H
|
3
|
+
|
4
|
+
#include <QWebPage>
|
5
|
+
|
6
|
+
class SunscraperWebPage : public QWebPage
|
7
|
+
{
|
8
|
+
Q_OBJECT
|
9
|
+
|
10
|
+
public:
|
11
|
+
SunscraperWebPage(QObject *parent = 0);
|
12
|
+
|
13
|
+
signals:
|
14
|
+
void consoleMessage(QString message);
|
15
|
+
|
16
|
+
protected:
|
17
|
+
virtual void javaScriptConsoleMessage(const QString & message, int lineNumber, const QString & sourceID);
|
18
|
+
};
|
19
|
+
|
20
|
+
#endif /* SUNSCRAPERWEBPAGE_H */
|
@@ -2,23 +2,25 @@
|
|
2
2
|
#include <QWebPage>
|
3
3
|
#include <QWebFrame>
|
4
4
|
#include <QTimer>
|
5
|
-
#include
|
5
|
+
#include <QWebView>
|
6
|
+
#include "sunscraperworker.h"
|
7
|
+
#include "sunscraperwebpage.h"
|
6
8
|
#include "sunscraperproxy.h"
|
7
9
|
#include <QtDebug>
|
8
10
|
#include <time.h>
|
9
11
|
|
10
12
|
#if defined(Q_OS_LINUX) || defined(Q_OS_UNIX)
|
11
|
-
pthread_t
|
13
|
+
pthread_t SunscraperWorker::m_thread;
|
12
14
|
#endif
|
13
15
|
|
14
|
-
|
15
|
-
QSemaphore
|
16
|
+
SunscraperWorker *SunscraperWorker::m_instance;
|
17
|
+
QSemaphore SunscraperWorker::m_initializationLock;
|
16
18
|
|
17
|
-
|
19
|
+
SunscraperWorker::SunscraperWorker()
|
18
20
|
{
|
19
21
|
}
|
20
22
|
|
21
|
-
|
23
|
+
SunscraperWorker *SunscraperWorker::instance()
|
22
24
|
{
|
23
25
|
m_initializationLock.acquire(1);
|
24
26
|
m_initializationLock.release(1);
|
@@ -26,14 +28,14 @@ SunscraperThread *SunscraperThread::instance()
|
|
26
28
|
return m_instance;
|
27
29
|
}
|
28
30
|
|
29
|
-
void
|
31
|
+
void SunscraperWorker::invoke()
|
30
32
|
{
|
31
33
|
#if defined(Q_OS_LINUX) || defined(Q_OS_UNIX)
|
32
|
-
pthread_create(&m_thread, NULL, &
|
34
|
+
pthread_create(&m_thread, NULL, &SunscraperWorker::thread_routine, NULL);
|
33
35
|
#endif
|
34
36
|
}
|
35
37
|
|
36
|
-
void *
|
38
|
+
void *SunscraperWorker::thread_routine(void *)
|
37
39
|
{
|
38
40
|
/* Better error messages. */
|
39
41
|
int argc = 1;
|
@@ -49,9 +51,9 @@ void *SunscraperThread::thread_routine(void *)
|
|
49
51
|
QApplication app(argc, argv);
|
50
52
|
|
51
53
|
if(m_instance != NULL)
|
52
|
-
qFatal("Attempt to invoke
|
54
|
+
qFatal("Attempt to invoke SunscraperWorker more than once");
|
53
55
|
|
54
|
-
m_instance = new
|
56
|
+
m_instance = new SunscraperWorker();
|
55
57
|
m_initializationLock.release(1);
|
56
58
|
|
57
59
|
/* The magic value 42 means we want exit from the loop. */
|
@@ -62,7 +64,7 @@ void *SunscraperThread::thread_routine(void *)
|
|
62
64
|
return NULL;
|
63
65
|
}
|
64
66
|
|
65
|
-
void
|
67
|
+
void SunscraperWorker::commitSuicide()
|
66
68
|
{
|
67
69
|
QApplication::exit(42);
|
68
70
|
|
@@ -71,19 +73,19 @@ void SunscraperThread::commitSuicide()
|
|
71
73
|
#endif
|
72
74
|
}
|
73
75
|
|
74
|
-
void
|
76
|
+
void SunscraperWorker::loadHtml(unsigned queryId, QString html)
|
75
77
|
{
|
76
78
|
QWebPage *webPage = initializeWebPage(queryId);
|
77
79
|
webPage->mainFrame()->setHtml(html);
|
78
80
|
}
|
79
81
|
|
80
|
-
void
|
82
|
+
void SunscraperWorker::loadUrl(unsigned queryId, QString url)
|
81
83
|
{
|
82
84
|
QWebPage *webPage = initializeWebPage(queryId);
|
83
85
|
webPage->mainFrame()->load(url);
|
84
86
|
}
|
85
87
|
|
86
|
-
void
|
88
|
+
void SunscraperWorker::setTimeout(unsigned queryId, unsigned timeout)
|
87
89
|
{
|
88
90
|
Q_ASSERT(m_timers[queryId] == NULL);
|
89
91
|
|
@@ -97,7 +99,7 @@ void SunscraperThread::setTimeout(unsigned queryId, unsigned timeout)
|
|
97
99
|
m_timers[queryId] = timer;
|
98
100
|
}
|
99
101
|
|
100
|
-
void
|
102
|
+
void SunscraperWorker::finalize(unsigned queryId)
|
101
103
|
{
|
102
104
|
Q_ASSERT(m_webPages[queryId] != NULL);
|
103
105
|
|
@@ -110,20 +112,28 @@ void SunscraperThread::finalize(unsigned queryId)
|
|
110
112
|
}
|
111
113
|
}
|
112
114
|
|
113
|
-
QWebPage *
|
115
|
+
QWebPage *SunscraperWorker::initializeWebPage(unsigned queryId)
|
114
116
|
{
|
115
117
|
Q_ASSERT(m_webPages[queryId] == NULL);
|
116
118
|
|
117
|
-
|
118
|
-
|
119
|
-
|
119
|
+
SunscraperWebPage *webPage = new SunscraperWebPage(this);
|
120
|
+
webPage->settings()->setAttribute(QWebSettings::LocalStorageEnabled, true);
|
121
|
+
|
122
|
+
connect(webPage, SIGNAL(frameCreated(QWebFrame*)), this, SLOT(attachFrame(QWebFrame*)));
|
123
|
+
connect(webPage, SIGNAL(consoleMessage(QString)), this, SLOT(routeMessage(QString)));
|
120
124
|
|
121
125
|
m_webPages[queryId] = webPage;
|
122
126
|
|
123
127
|
return webPage;
|
124
128
|
}
|
125
129
|
|
126
|
-
void
|
130
|
+
void SunscraperWorker::attachFrame(QWebFrame *frame)
|
131
|
+
{
|
132
|
+
connect(frame, SIGNAL(javaScriptWindowObjectCleared()),
|
133
|
+
this, SLOT(attachAPI()));
|
134
|
+
}
|
135
|
+
|
136
|
+
void SunscraperWorker::attachAPI()
|
127
137
|
{
|
128
138
|
QWebFrame *origin = static_cast<QWebFrame *>(QObject::sender());
|
129
139
|
QWebPage *page = origin->page();
|
@@ -137,7 +147,7 @@ void SunscraperThread::attachAPI()
|
|
137
147
|
origin->addToJavaScriptWindowObject("Sunscraper", proxy, QScriptEngine::QtOwnership);
|
138
148
|
}
|
139
149
|
|
140
|
-
void
|
150
|
+
void SunscraperWorker::routeTimeout()
|
141
151
|
{
|
142
152
|
QTimer *origin = static_cast<QTimer *>(QObject::sender());
|
143
153
|
|
@@ -146,3 +156,8 @@ void SunscraperThread::routeTimeout()
|
|
146
156
|
|
147
157
|
emit timeout(queryId);
|
148
158
|
}
|
159
|
+
|
160
|
+
void SunscraperWorker::routeMessage(QString message)
|
161
|
+
{
|
162
|
+
qDebug() << "Sunscraper Console:" << message;
|
163
|
+
}
|
@@ -1,20 +1,22 @@
|
|
1
|
-
#ifndef
|
2
|
-
#define
|
1
|
+
#ifndef SUNSCRAPERWORKER_H
|
2
|
+
#define SUNSCRAPERWORKER_H
|
3
3
|
|
4
4
|
#include <QObject>
|
5
5
|
#include <QSemaphore>
|
6
6
|
#include <QMap>
|
7
|
+
#include <QUrl>
|
7
8
|
|
8
9
|
class QWebPage;
|
10
|
+
class QWebFrame;
|
9
11
|
class QTimer;
|
10
12
|
|
11
|
-
class
|
13
|
+
class SunscraperWorker : public QObject
|
12
14
|
{
|
13
15
|
Q_OBJECT
|
14
16
|
public:
|
15
17
|
static void invoke();
|
16
18
|
static void commitSuicide();
|
17
|
-
static
|
19
|
+
static SunscraperWorker *instance();
|
18
20
|
|
19
21
|
signals:
|
20
22
|
void finished(unsigned queryId, QString result);
|
@@ -27,17 +29,19 @@ public slots:
|
|
27
29
|
void finalize(unsigned queryId);
|
28
30
|
|
29
31
|
private slots:
|
32
|
+
void attachFrame(QWebFrame *frame);
|
30
33
|
void attachAPI();
|
31
34
|
void routeTimeout();
|
35
|
+
void routeMessage(QString message);
|
32
36
|
|
33
37
|
private:
|
34
|
-
static
|
38
|
+
static SunscraperWorker *m_instance;
|
35
39
|
static QSemaphore m_initializationLock;
|
36
40
|
|
37
41
|
#if defined(Q_OS_LINUX) || defined(Q_OS_UNIX)
|
38
42
|
static pthread_t m_thread;
|
39
43
|
#else
|
40
|
-
#error Your platform is unsupported. Implement
|
44
|
+
#error Your platform is unsupported. Implement SunscraperWorker::invoke() and send a pull request.
|
41
45
|
#endif
|
42
46
|
|
43
47
|
static void *thread_routine(void *arg);
|
@@ -45,10 +49,10 @@ private:
|
|
45
49
|
QMap<unsigned, QWebPage *> m_webPages;
|
46
50
|
QMap<unsigned, QTimer *> m_timers;
|
47
51
|
|
48
|
-
|
49
|
-
|
52
|
+
SunscraperWorker();
|
53
|
+
SunscraperWorker(SunscraperWorker &);
|
50
54
|
|
51
55
|
QWebPage *initializeWebPage(unsigned queryId);
|
52
56
|
};
|
53
57
|
|
54
|
-
#endif //
|
58
|
+
#endif // SUNSCRAPERWORKER_H
|
data/ext/extconf.rb
CHANGED
@@ -1,22 +1,31 @@
|
|
1
1
|
# This Makefile will get replaced by qmake.
|
2
2
|
|
3
|
-
if
|
4
|
-
|
5
|
-
|
3
|
+
if RUBY_PLATFORM =~ /darwin/i || RbConfig::CONFIG['target_os'] == 'darwin'
|
4
|
+
# Cannot you OS X have a build system like all sane people?
|
5
|
+
# Win32 wins again.
|
6
6
|
qmake = %{qmake -spec macx-g++}
|
7
|
-
else
|
8
|
-
qmake = %{qmake}
|
9
|
-
end
|
10
7
|
|
11
|
-
File.open("Makefile", "w") do |mf|
|
12
|
-
|
8
|
+
File.open("Makefile", "w") do |mf|
|
9
|
+
mf.puts <<-ENDM
|
13
10
|
all:
|
14
|
-
cd embed
|
15
|
-
cd standalone
|
16
|
-
make -C embed
|
17
|
-
make -C standalone
|
18
|
-
|
11
|
+
(cd embed && #{qmake}; make)
|
12
|
+
(cd standalone && #{qmake}; make)
|
19
13
|
install:
|
20
14
|
# do nothing
|
21
|
-
|
22
|
-
end
|
15
|
+
ENDM
|
16
|
+
end
|
17
|
+
else
|
18
|
+
if Gem.win_platform?
|
19
|
+
qmake = %{qmake -spec win32-g++}
|
20
|
+
else
|
21
|
+
qmake = %{qmake}
|
22
|
+
end
|
23
|
+
|
24
|
+
File.open("Makefile", "w") do |mf|
|
25
|
+
mf.puts <<-ENDM
|
26
|
+
all:
|
27
|
+
#{qmake}
|
28
|
+
make
|
29
|
+
ENDM
|
30
|
+
end
|
31
|
+
end
|
File without changes
|
@@ -35,6 +35,8 @@ QWebPage *SunscraperWorker::initializeWebPage(unsigned queryId)
|
|
35
35
|
Q_ASSERT(_webPages[queryId] == NULL);
|
36
36
|
|
37
37
|
QWebPage *webPage = new QWebPage(this);
|
38
|
+
webPage->settings()->setAttribute(QWebSettings::LocalStorageEnabled, true);
|
39
|
+
|
38
40
|
connect(webPage->mainFrame(), SIGNAL(javaScriptWindowObjectCleared()),
|
39
41
|
this, SLOT(attachAPI()));
|
40
42
|
|
data/sunscraper.gemspec
CHANGED
@@ -3,7 +3,7 @@ $:.push File.expand_path("../lib", __FILE__)
|
|
3
3
|
|
4
4
|
Gem::Specification.new do |s|
|
5
5
|
s.name = "sunscraper"
|
6
|
-
s.version = "1.1.0.
|
6
|
+
s.version = "1.1.0.beta3"
|
7
7
|
s.authors = ["Peter Zotov"]
|
8
8
|
s.email = ["whitequark@whitequark.org"]
|
9
9
|
s.homepage = "http://github.com/whitequark/sunscraper"
|
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: sunscraper
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 1.1.0.
|
4
|
+
version: 1.1.0.beta3
|
5
5
|
prerelease: 6
|
6
6
|
platform: ruby
|
7
7
|
authors:
|
@@ -9,11 +9,11 @@ authors:
|
|
9
9
|
autorequire:
|
10
10
|
bindir: bin
|
11
11
|
cert_chain: []
|
12
|
-
date: 2012-03-
|
12
|
+
date: 2012-03-10 00:00:00.000000000 Z
|
13
13
|
dependencies:
|
14
14
|
- !ruby/object:Gem::Dependency
|
15
15
|
name: rspec
|
16
|
-
requirement: &
|
16
|
+
requirement: &81880600 !ruby/object:Gem::Requirement
|
17
17
|
none: false
|
18
18
|
requirements:
|
19
19
|
- - ! '>='
|
@@ -21,10 +21,10 @@ dependencies:
|
|
21
21
|
version: '0'
|
22
22
|
type: :development
|
23
23
|
prerelease: false
|
24
|
-
version_requirements: *
|
24
|
+
version_requirements: *81880600
|
25
25
|
- !ruby/object:Gem::Dependency
|
26
26
|
name: ffi
|
27
|
-
requirement: &
|
27
|
+
requirement: &81880200 !ruby/object:Gem::Requirement
|
28
28
|
none: false
|
29
29
|
requirements:
|
30
30
|
- - ! '>='
|
@@ -32,7 +32,7 @@ dependencies:
|
|
32
32
|
version: 1.0.11
|
33
33
|
type: :runtime
|
34
34
|
prerelease: false
|
35
|
-
version_requirements: *
|
35
|
+
version_requirements: *81880200
|
36
36
|
description: A WebKit-based, JavaScript-capable HTML scraper.
|
37
37
|
email:
|
38
38
|
- whitequark@whitequark.org
|
@@ -50,18 +50,20 @@ files:
|
|
50
50
|
- README.md
|
51
51
|
- Rakefile
|
52
52
|
- ext/.gitignore
|
53
|
+
- ext/embed/embed.pro
|
53
54
|
- ext/embed/sunscraper.cpp
|
54
55
|
- ext/embed/sunscraper.h
|
55
|
-
- ext/embed/sunscraper.pro
|
56
56
|
- ext/embed/sunscraperexternal.cpp
|
57
57
|
- ext/embed/sunscraperlibrary.cpp
|
58
58
|
- ext/embed/sunscraperlibrary.h
|
59
59
|
- ext/embed/sunscraperproxy.cpp
|
60
60
|
- ext/embed/sunscraperproxy.h
|
61
|
-
- ext/embed/
|
62
|
-
- ext/embed/
|
61
|
+
- ext/embed/sunscraperwebpage.cpp
|
62
|
+
- ext/embed/sunscraperwebpage.h
|
63
|
+
- ext/embed/sunscraperworker.cpp
|
64
|
+
- ext/embed/sunscraperworker.h
|
63
65
|
- ext/extconf.rb
|
64
|
-
- ext/standalone/
|
66
|
+
- ext/standalone/standalone.pro
|
65
67
|
- ext/standalone/sunscrapermain.cpp
|
66
68
|
- ext/standalone/sunscraperproxy.cpp
|
67
69
|
- ext/standalone/sunscraperproxy.h
|
@@ -69,6 +71,7 @@ files:
|
|
69
71
|
- ext/standalone/sunscraperrpc.h
|
70
72
|
- ext/standalone/sunscraperworker.cpp
|
71
73
|
- ext/standalone/sunscraperworker.h
|
74
|
+
- ext/sunscraper-ext.pro
|
72
75
|
- lib/sunscraper.rb
|
73
76
|
- lib/sunscraper/library.rb
|
74
77
|
- lib/sunscraper/standalone.rb
|
@@ -100,4 +103,3 @@ signing_key:
|
|
100
103
|
specification_version: 3
|
101
104
|
summary: A WebKit-based, JavaScript-capable HTML scraper.
|
102
105
|
test_files: []
|
103
|
-
has_rdoc:
|