sunscraper 1.1.0.beta2 → 1.1.0.beta3
Sign up to get free protection for your applications and to get access to all the features.
- data/ext/embed/{sunscraper.pro → embed.pro} +4 -2
- data/ext/embed/sunscraper.cpp +2 -2
- data/ext/embed/sunscraperexternal.cpp +2 -2
- data/ext/embed/sunscraperlibrary.cpp +2 -2
- data/ext/embed/sunscraperlibrary.h +1 -1
- data/ext/embed/sunscraperwebpage.cpp +12 -0
- data/ext/embed/sunscraperwebpage.h +20 -0
- data/ext/embed/{sunscraperthread.cpp → sunscraperworker.cpp} +37 -22
- data/ext/embed/{sunscraperthread.h → sunscraperworker.h} +13 -9
- data/ext/extconf.rb +24 -15
- data/ext/standalone/{sunscraper.pro → standalone.pro} +0 -0
- data/ext/standalone/sunscraperproxy.cpp +1 -0
- data/ext/standalone/sunscraperworker.cpp +2 -0
- data/ext/sunscraper-ext.pro +2 -0
- data/sunscraper.gemspec +1 -1
- metadata +13 -11
@@ -4,13 +4,15 @@ TARGET = sunscraper
|
|
4
4
|
TEMPLATE = lib
|
5
5
|
|
6
6
|
SOURCES += sunscraperlibrary.cpp \
|
7
|
-
|
7
|
+
sunscraperworker.cpp \
|
8
|
+
sunscraperwebpage.cpp \
|
8
9
|
sunscraperexternal.cpp \
|
9
10
|
sunscraper.cpp \
|
10
11
|
sunscraperproxy.cpp
|
11
12
|
|
12
13
|
HEADERS += sunscraperlibrary.h \
|
13
|
-
|
14
|
+
sunscraperworker.h \
|
15
|
+
sunscraperwebpage.h \
|
14
16
|
sunscraper.h \
|
15
17
|
sunscraperproxy.h
|
16
18
|
|
data/ext/embed/sunscraper.cpp
CHANGED
@@ -8,7 +8,7 @@
|
|
8
8
|
#include <QtDebug>
|
9
9
|
#include "sunscraper.h"
|
10
10
|
#include "sunscraperlibrary.h"
|
11
|
-
#include "
|
11
|
+
#include "sunscraperworker.h"
|
12
12
|
|
13
13
|
unsigned Sunscraper::m_nextQueryId = 1;
|
14
14
|
QMutex Sunscraper::m_staticMutex;
|
@@ -19,7 +19,7 @@ Sunscraper::Sunscraper()
|
|
19
19
|
|
20
20
|
m_queryId = m_nextQueryId++;
|
21
21
|
|
22
|
-
|
22
|
+
SunscraperWorker *worker = SunscraperWorker::instance();
|
23
23
|
if(worker == NULL)
|
24
24
|
qFatal("Attempt to run Sunscraper before thread initialization");
|
25
25
|
|
@@ -1,5 +1,5 @@
|
|
1
1
|
#include "sunscraper.h"
|
2
|
-
#include "
|
2
|
+
#include "sunscraperworker.h"
|
3
3
|
|
4
4
|
extern "C" {
|
5
5
|
Sunscraper *sunscraper_create()
|
@@ -34,6 +34,6 @@ extern "C" {
|
|
34
34
|
|
35
35
|
void sunscraper_finalize()
|
36
36
|
{
|
37
|
-
|
37
|
+
SunscraperWorker::commitSuicide();
|
38
38
|
}
|
39
39
|
}
|
@@ -1,12 +1,12 @@
|
|
1
1
|
#include "sunscraperlibrary.h"
|
2
|
-
#include "
|
2
|
+
#include "sunscraperworker.h"
|
3
3
|
#include <QtDebug>
|
4
4
|
|
5
5
|
SunscraperLibrary SunscraperLibrary::m_instance;
|
6
6
|
|
7
7
|
SunscraperLibrary::SunscraperLibrary()
|
8
8
|
{
|
9
|
-
|
9
|
+
SunscraperWorker::invoke();
|
10
10
|
}
|
11
11
|
|
12
12
|
SunscraperLibrary::~SunscraperLibrary()
|
@@ -0,0 +1,12 @@
|
|
1
|
+
#include "sunscraperwebpage.h"
|
2
|
+
|
3
|
+
SunscraperWebPage::SunscraperWebPage(QObject *parent) :
|
4
|
+
QWebPage(parent)
|
5
|
+
{
|
6
|
+
}
|
7
|
+
|
8
|
+
void SunscraperWebPage::javaScriptConsoleMessage(const QString &message,
|
9
|
+
int lineNumber, const QString &sourceID)
|
10
|
+
{
|
11
|
+
emit consoleMessage(QString("%1:%2> %3").arg(sourceID).arg(lineNumber).arg(message));
|
12
|
+
}
|
@@ -0,0 +1,20 @@
|
|
1
|
+
#ifndef SUNSCRAPERWEBPAGE_H
|
2
|
+
#define SUNSCRAPERWEBPAGE_H
|
3
|
+
|
4
|
+
#include <QWebPage>
|
5
|
+
|
6
|
+
class SunscraperWebPage : public QWebPage
|
7
|
+
{
|
8
|
+
Q_OBJECT
|
9
|
+
|
10
|
+
public:
|
11
|
+
SunscraperWebPage(QObject *parent = 0);
|
12
|
+
|
13
|
+
signals:
|
14
|
+
void consoleMessage(QString message);
|
15
|
+
|
16
|
+
protected:
|
17
|
+
virtual void javaScriptConsoleMessage(const QString & message, int lineNumber, const QString & sourceID);
|
18
|
+
};
|
19
|
+
|
20
|
+
#endif /* SUNSCRAPERWEBPAGE_H */
|
@@ -2,23 +2,25 @@
|
|
2
2
|
#include <QWebPage>
|
3
3
|
#include <QWebFrame>
|
4
4
|
#include <QTimer>
|
5
|
-
#include
|
5
|
+
#include <QWebView>
|
6
|
+
#include "sunscraperworker.h"
|
7
|
+
#include "sunscraperwebpage.h"
|
6
8
|
#include "sunscraperproxy.h"
|
7
9
|
#include <QtDebug>
|
8
10
|
#include <time.h>
|
9
11
|
|
10
12
|
#if defined(Q_OS_LINUX) || defined(Q_OS_UNIX)
|
11
|
-
pthread_t
|
13
|
+
pthread_t SunscraperWorker::m_thread;
|
12
14
|
#endif
|
13
15
|
|
14
|
-
|
15
|
-
QSemaphore
|
16
|
+
SunscraperWorker *SunscraperWorker::m_instance;
|
17
|
+
QSemaphore SunscraperWorker::m_initializationLock;
|
16
18
|
|
17
|
-
|
19
|
+
SunscraperWorker::SunscraperWorker()
|
18
20
|
{
|
19
21
|
}
|
20
22
|
|
21
|
-
|
23
|
+
SunscraperWorker *SunscraperWorker::instance()
|
22
24
|
{
|
23
25
|
m_initializationLock.acquire(1);
|
24
26
|
m_initializationLock.release(1);
|
@@ -26,14 +28,14 @@ SunscraperThread *SunscraperThread::instance()
|
|
26
28
|
return m_instance;
|
27
29
|
}
|
28
30
|
|
29
|
-
void
|
31
|
+
void SunscraperWorker::invoke()
|
30
32
|
{
|
31
33
|
#if defined(Q_OS_LINUX) || defined(Q_OS_UNIX)
|
32
|
-
pthread_create(&m_thread, NULL, &
|
34
|
+
pthread_create(&m_thread, NULL, &SunscraperWorker::thread_routine, NULL);
|
33
35
|
#endif
|
34
36
|
}
|
35
37
|
|
36
|
-
void *
|
38
|
+
void *SunscraperWorker::thread_routine(void *)
|
37
39
|
{
|
38
40
|
/* Better error messages. */
|
39
41
|
int argc = 1;
|
@@ -49,9 +51,9 @@ void *SunscraperThread::thread_routine(void *)
|
|
49
51
|
QApplication app(argc, argv);
|
50
52
|
|
51
53
|
if(m_instance != NULL)
|
52
|
-
qFatal("Attempt to invoke
|
54
|
+
qFatal("Attempt to invoke SunscraperWorker more than once");
|
53
55
|
|
54
|
-
m_instance = new
|
56
|
+
m_instance = new SunscraperWorker();
|
55
57
|
m_initializationLock.release(1);
|
56
58
|
|
57
59
|
/* The magic value 42 means we want exit from the loop. */
|
@@ -62,7 +64,7 @@ void *SunscraperThread::thread_routine(void *)
|
|
62
64
|
return NULL;
|
63
65
|
}
|
64
66
|
|
65
|
-
void
|
67
|
+
void SunscraperWorker::commitSuicide()
|
66
68
|
{
|
67
69
|
QApplication::exit(42);
|
68
70
|
|
@@ -71,19 +73,19 @@ void SunscraperThread::commitSuicide()
|
|
71
73
|
#endif
|
72
74
|
}
|
73
75
|
|
74
|
-
void
|
76
|
+
void SunscraperWorker::loadHtml(unsigned queryId, QString html)
|
75
77
|
{
|
76
78
|
QWebPage *webPage = initializeWebPage(queryId);
|
77
79
|
webPage->mainFrame()->setHtml(html);
|
78
80
|
}
|
79
81
|
|
80
|
-
void
|
82
|
+
void SunscraperWorker::loadUrl(unsigned queryId, QString url)
|
81
83
|
{
|
82
84
|
QWebPage *webPage = initializeWebPage(queryId);
|
83
85
|
webPage->mainFrame()->load(url);
|
84
86
|
}
|
85
87
|
|
86
|
-
void
|
88
|
+
void SunscraperWorker::setTimeout(unsigned queryId, unsigned timeout)
|
87
89
|
{
|
88
90
|
Q_ASSERT(m_timers[queryId] == NULL);
|
89
91
|
|
@@ -97,7 +99,7 @@ void SunscraperThread::setTimeout(unsigned queryId, unsigned timeout)
|
|
97
99
|
m_timers[queryId] = timer;
|
98
100
|
}
|
99
101
|
|
100
|
-
void
|
102
|
+
void SunscraperWorker::finalize(unsigned queryId)
|
101
103
|
{
|
102
104
|
Q_ASSERT(m_webPages[queryId] != NULL);
|
103
105
|
|
@@ -110,20 +112,28 @@ void SunscraperThread::finalize(unsigned queryId)
|
|
110
112
|
}
|
111
113
|
}
|
112
114
|
|
113
|
-
QWebPage *
|
115
|
+
QWebPage *SunscraperWorker::initializeWebPage(unsigned queryId)
|
114
116
|
{
|
115
117
|
Q_ASSERT(m_webPages[queryId] == NULL);
|
116
118
|
|
117
|
-
|
118
|
-
|
119
|
-
|
119
|
+
SunscraperWebPage *webPage = new SunscraperWebPage(this);
|
120
|
+
webPage->settings()->setAttribute(QWebSettings::LocalStorageEnabled, true);
|
121
|
+
|
122
|
+
connect(webPage, SIGNAL(frameCreated(QWebFrame*)), this, SLOT(attachFrame(QWebFrame*)));
|
123
|
+
connect(webPage, SIGNAL(consoleMessage(QString)), this, SLOT(routeMessage(QString)));
|
120
124
|
|
121
125
|
m_webPages[queryId] = webPage;
|
122
126
|
|
123
127
|
return webPage;
|
124
128
|
}
|
125
129
|
|
126
|
-
void
|
130
|
+
void SunscraperWorker::attachFrame(QWebFrame *frame)
|
131
|
+
{
|
132
|
+
connect(frame, SIGNAL(javaScriptWindowObjectCleared()),
|
133
|
+
this, SLOT(attachAPI()));
|
134
|
+
}
|
135
|
+
|
136
|
+
void SunscraperWorker::attachAPI()
|
127
137
|
{
|
128
138
|
QWebFrame *origin = static_cast<QWebFrame *>(QObject::sender());
|
129
139
|
QWebPage *page = origin->page();
|
@@ -137,7 +147,7 @@ void SunscraperThread::attachAPI()
|
|
137
147
|
origin->addToJavaScriptWindowObject("Sunscraper", proxy, QScriptEngine::QtOwnership);
|
138
148
|
}
|
139
149
|
|
140
|
-
void
|
150
|
+
void SunscraperWorker::routeTimeout()
|
141
151
|
{
|
142
152
|
QTimer *origin = static_cast<QTimer *>(QObject::sender());
|
143
153
|
|
@@ -146,3 +156,8 @@ void SunscraperThread::routeTimeout()
|
|
146
156
|
|
147
157
|
emit timeout(queryId);
|
148
158
|
}
|
159
|
+
|
160
|
+
void SunscraperWorker::routeMessage(QString message)
|
161
|
+
{
|
162
|
+
qDebug() << "Sunscraper Console:" << message;
|
163
|
+
}
|
@@ -1,20 +1,22 @@
|
|
1
|
-
#ifndef
|
2
|
-
#define
|
1
|
+
#ifndef SUNSCRAPERWORKER_H
|
2
|
+
#define SUNSCRAPERWORKER_H
|
3
3
|
|
4
4
|
#include <QObject>
|
5
5
|
#include <QSemaphore>
|
6
6
|
#include <QMap>
|
7
|
+
#include <QUrl>
|
7
8
|
|
8
9
|
class QWebPage;
|
10
|
+
class QWebFrame;
|
9
11
|
class QTimer;
|
10
12
|
|
11
|
-
class
|
13
|
+
class SunscraperWorker : public QObject
|
12
14
|
{
|
13
15
|
Q_OBJECT
|
14
16
|
public:
|
15
17
|
static void invoke();
|
16
18
|
static void commitSuicide();
|
17
|
-
static
|
19
|
+
static SunscraperWorker *instance();
|
18
20
|
|
19
21
|
signals:
|
20
22
|
void finished(unsigned queryId, QString result);
|
@@ -27,17 +29,19 @@ public slots:
|
|
27
29
|
void finalize(unsigned queryId);
|
28
30
|
|
29
31
|
private slots:
|
32
|
+
void attachFrame(QWebFrame *frame);
|
30
33
|
void attachAPI();
|
31
34
|
void routeTimeout();
|
35
|
+
void routeMessage(QString message);
|
32
36
|
|
33
37
|
private:
|
34
|
-
static
|
38
|
+
static SunscraperWorker *m_instance;
|
35
39
|
static QSemaphore m_initializationLock;
|
36
40
|
|
37
41
|
#if defined(Q_OS_LINUX) || defined(Q_OS_UNIX)
|
38
42
|
static pthread_t m_thread;
|
39
43
|
#else
|
40
|
-
#error Your platform is unsupported. Implement
|
44
|
+
#error Your platform is unsupported. Implement SunscraperWorker::invoke() and send a pull request.
|
41
45
|
#endif
|
42
46
|
|
43
47
|
static void *thread_routine(void *arg);
|
@@ -45,10 +49,10 @@ private:
|
|
45
49
|
QMap<unsigned, QWebPage *> m_webPages;
|
46
50
|
QMap<unsigned, QTimer *> m_timers;
|
47
51
|
|
48
|
-
|
49
|
-
|
52
|
+
SunscraperWorker();
|
53
|
+
SunscraperWorker(SunscraperWorker &);
|
50
54
|
|
51
55
|
QWebPage *initializeWebPage(unsigned queryId);
|
52
56
|
};
|
53
57
|
|
54
|
-
#endif //
|
58
|
+
#endif // SUNSCRAPERWORKER_H
|
data/ext/extconf.rb
CHANGED
@@ -1,22 +1,31 @@
|
|
1
1
|
# This Makefile will get replaced by qmake.
|
2
2
|
|
3
|
-
if
|
4
|
-
|
5
|
-
|
3
|
+
if RUBY_PLATFORM =~ /darwin/i || RbConfig::CONFIG['target_os'] == 'darwin'
|
4
|
+
# Cannot you OS X have a build system like all sane people?
|
5
|
+
# Win32 wins again.
|
6
6
|
qmake = %{qmake -spec macx-g++}
|
7
|
-
else
|
8
|
-
qmake = %{qmake}
|
9
|
-
end
|
10
7
|
|
11
|
-
File.open("Makefile", "w") do |mf|
|
12
|
-
|
8
|
+
File.open("Makefile", "w") do |mf|
|
9
|
+
mf.puts <<-ENDM
|
13
10
|
all:
|
14
|
-
cd embed
|
15
|
-
cd standalone
|
16
|
-
make -C embed
|
17
|
-
make -C standalone
|
18
|
-
|
11
|
+
(cd embed && #{qmake}; make)
|
12
|
+
(cd standalone && #{qmake}; make)
|
19
13
|
install:
|
20
14
|
# do nothing
|
21
|
-
|
22
|
-
end
|
15
|
+
ENDM
|
16
|
+
end
|
17
|
+
else
|
18
|
+
if Gem.win_platform?
|
19
|
+
qmake = %{qmake -spec win32-g++}
|
20
|
+
else
|
21
|
+
qmake = %{qmake}
|
22
|
+
end
|
23
|
+
|
24
|
+
File.open("Makefile", "w") do |mf|
|
25
|
+
mf.puts <<-ENDM
|
26
|
+
all:
|
27
|
+
#{qmake}
|
28
|
+
make
|
29
|
+
ENDM
|
30
|
+
end
|
31
|
+
end
|
File without changes
|
@@ -35,6 +35,8 @@ QWebPage *SunscraperWorker::initializeWebPage(unsigned queryId)
|
|
35
35
|
Q_ASSERT(_webPages[queryId] == NULL);
|
36
36
|
|
37
37
|
QWebPage *webPage = new QWebPage(this);
|
38
|
+
webPage->settings()->setAttribute(QWebSettings::LocalStorageEnabled, true);
|
39
|
+
|
38
40
|
connect(webPage->mainFrame(), SIGNAL(javaScriptWindowObjectCleared()),
|
39
41
|
this, SLOT(attachAPI()));
|
40
42
|
|
data/sunscraper.gemspec
CHANGED
@@ -3,7 +3,7 @@ $:.push File.expand_path("../lib", __FILE__)
|
|
3
3
|
|
4
4
|
Gem::Specification.new do |s|
|
5
5
|
s.name = "sunscraper"
|
6
|
-
s.version = "1.1.0.
|
6
|
+
s.version = "1.1.0.beta3"
|
7
7
|
s.authors = ["Peter Zotov"]
|
8
8
|
s.email = ["whitequark@whitequark.org"]
|
9
9
|
s.homepage = "http://github.com/whitequark/sunscraper"
|
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: sunscraper
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 1.1.0.
|
4
|
+
version: 1.1.0.beta3
|
5
5
|
prerelease: 6
|
6
6
|
platform: ruby
|
7
7
|
authors:
|
@@ -9,11 +9,11 @@ authors:
|
|
9
9
|
autorequire:
|
10
10
|
bindir: bin
|
11
11
|
cert_chain: []
|
12
|
-
date: 2012-03-
|
12
|
+
date: 2012-03-10 00:00:00.000000000 Z
|
13
13
|
dependencies:
|
14
14
|
- !ruby/object:Gem::Dependency
|
15
15
|
name: rspec
|
16
|
-
requirement: &
|
16
|
+
requirement: &81880600 !ruby/object:Gem::Requirement
|
17
17
|
none: false
|
18
18
|
requirements:
|
19
19
|
- - ! '>='
|
@@ -21,10 +21,10 @@ dependencies:
|
|
21
21
|
version: '0'
|
22
22
|
type: :development
|
23
23
|
prerelease: false
|
24
|
-
version_requirements: *
|
24
|
+
version_requirements: *81880600
|
25
25
|
- !ruby/object:Gem::Dependency
|
26
26
|
name: ffi
|
27
|
-
requirement: &
|
27
|
+
requirement: &81880200 !ruby/object:Gem::Requirement
|
28
28
|
none: false
|
29
29
|
requirements:
|
30
30
|
- - ! '>='
|
@@ -32,7 +32,7 @@ dependencies:
|
|
32
32
|
version: 1.0.11
|
33
33
|
type: :runtime
|
34
34
|
prerelease: false
|
35
|
-
version_requirements: *
|
35
|
+
version_requirements: *81880200
|
36
36
|
description: A WebKit-based, JavaScript-capable HTML scraper.
|
37
37
|
email:
|
38
38
|
- whitequark@whitequark.org
|
@@ -50,18 +50,20 @@ files:
|
|
50
50
|
- README.md
|
51
51
|
- Rakefile
|
52
52
|
- ext/.gitignore
|
53
|
+
- ext/embed/embed.pro
|
53
54
|
- ext/embed/sunscraper.cpp
|
54
55
|
- ext/embed/sunscraper.h
|
55
|
-
- ext/embed/sunscraper.pro
|
56
56
|
- ext/embed/sunscraperexternal.cpp
|
57
57
|
- ext/embed/sunscraperlibrary.cpp
|
58
58
|
- ext/embed/sunscraperlibrary.h
|
59
59
|
- ext/embed/sunscraperproxy.cpp
|
60
60
|
- ext/embed/sunscraperproxy.h
|
61
|
-
- ext/embed/
|
62
|
-
- ext/embed/
|
61
|
+
- ext/embed/sunscraperwebpage.cpp
|
62
|
+
- ext/embed/sunscraperwebpage.h
|
63
|
+
- ext/embed/sunscraperworker.cpp
|
64
|
+
- ext/embed/sunscraperworker.h
|
63
65
|
- ext/extconf.rb
|
64
|
-
- ext/standalone/
|
66
|
+
- ext/standalone/standalone.pro
|
65
67
|
- ext/standalone/sunscrapermain.cpp
|
66
68
|
- ext/standalone/sunscraperproxy.cpp
|
67
69
|
- ext/standalone/sunscraperproxy.h
|
@@ -69,6 +71,7 @@ files:
|
|
69
71
|
- ext/standalone/sunscraperrpc.h
|
70
72
|
- ext/standalone/sunscraperworker.cpp
|
71
73
|
- ext/standalone/sunscraperworker.h
|
74
|
+
- ext/sunscraper-ext.pro
|
72
75
|
- lib/sunscraper.rb
|
73
76
|
- lib/sunscraper/library.rb
|
74
77
|
- lib/sunscraper/standalone.rb
|
@@ -100,4 +103,3 @@ signing_key:
|
|
100
103
|
specification_version: 3
|
101
104
|
summary: A WebKit-based, JavaScript-capable HTML scraper.
|
102
105
|
test_files: []
|
103
|
-
has_rdoc:
|