sunscraper 1.0.0 → 1.1.0.beta1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/.travis.yml +28 -0
- data/README.md +42 -20
- data/ext/.gitignore +5 -1
- data/ext/embed/sunscraper.cpp +92 -0
- data/ext/{sunscraper.h → embed/sunscraper.h} +9 -7
- data/ext/{sunscraper.pro → embed/sunscraper.pro} +2 -0
- data/ext/embed/sunscraperexternal.cpp +39 -0
- data/ext/{sunscraperlibrary.cpp → embed/sunscraperlibrary.cpp} +4 -9
- data/ext/{sunscraperlibrary.h → embed/sunscraperlibrary.h} +1 -5
- data/ext/embed/sunscraperproxy.cpp +14 -0
- data/ext/{sunscraperproxy.h → embed/sunscraperproxy.h} +3 -3
- data/ext/embed/sunscraperthread.cpp +148 -0
- data/ext/embed/sunscraperthread.h +54 -0
- data/ext/extconf.rb +13 -3
- data/ext/standalone/sunscraper.pro +13 -0
- data/ext/standalone/sunscrapermain.cpp +13 -0
- data/ext/{sunscraperproxy.cpp → standalone/sunscraperproxy.cpp} +2 -2
- data/ext/standalone/sunscraperproxy.h +24 -0
- data/ext/standalone/sunscraperrpc.cpp +183 -0
- data/ext/standalone/sunscraperrpc.h +64 -0
- data/ext/{sunscraperthread.cpp → standalone/sunscraperworker.cpp} +9 -18
- data/ext/{sunscraperthread.h → standalone/sunscraperworker.h} +8 -8
- data/lib/sunscraper/library.rb +33 -29
- data/lib/sunscraper/standalone.rb +168 -0
- data/lib/sunscraper.rb +48 -11
- data/spec/sunscraper_spec.rb +59 -13
- data/sunscraper.gemspec +2 -2
- metadata +58 -75
- data/ext/Makefile +0 -270
- data/ext/sunscraper.cpp +0 -86
- data/ext/sunscraperexternal.cpp +0 -33
data/.travis.yml
ADDED
@@ -0,0 +1,28 @@
|
|
1
|
+
language: ruby
|
2
|
+
install:
|
3
|
+
- "sudo apt-get install qt4-dev-tools --no-install-recommends"
|
4
|
+
- "(cd ext/; ruby extconf.rb; make)"
|
5
|
+
- "bundle install"
|
6
|
+
script:
|
7
|
+
- "xvfb-run bundle exec rspec"
|
8
|
+
rvm:
|
9
|
+
- 1.9.2
|
10
|
+
- 1.9.3
|
11
|
+
- jruby-19mode
|
12
|
+
- jruby-head
|
13
|
+
- rbx-19mode
|
14
|
+
env:
|
15
|
+
- EXPERIMENTAL=true
|
16
|
+
- EXPERIMENTAL=false
|
17
|
+
matrix:
|
18
|
+
exclude:
|
19
|
+
- rvm: 1.9.2
|
20
|
+
env: EXPERIMENTAL=true
|
21
|
+
- rvm: 1.9.3
|
22
|
+
env: EXPERIMENTAL=true
|
23
|
+
allow_fail:
|
24
|
+
- env: EXPERIMENTAL=true
|
25
|
+
notifications:
|
26
|
+
email:
|
27
|
+
- boris@roundlake.ru
|
28
|
+
- p.zotov@roundlake.ru
|
data/README.md
CHANGED
@@ -6,24 +6,26 @@ Sunscraper is a gem for prerendering pages with hashbang URLs like `http://whate
|
|
6
6
|
It works by loading content in the embedded web browser and waiting for a JavaScript method to be
|
7
7
|
called.
|
8
8
|
|
9
|
-
|
10
|
-
|
11
|
-
|
12
|
-
|
13
|
-
|
14
|
-
|
15
|
-
|
16
|
-
|
17
|
-
|
18
|
-
|
19
|
-
|
20
|
-
|
21
|
-
|
22
|
-
|
23
|
-
|
24
|
-
|
25
|
-
|
26
|
-
|
9
|
+
``` ruby
|
10
|
+
HTML = %{
|
11
|
+
<html>
|
12
|
+
<head>
|
13
|
+
<script type="text/javascript">
|
14
|
+
document.addEventListener("DOMContentLoaded", function() {
|
15
|
+
document.getElementById('fuga').textContent =
|
16
|
+
("!skrow tI").split("").reverse().join("");
|
17
|
+
Sunscraper.finish();
|
18
|
+
}, true);
|
19
|
+
</script>
|
20
|
+
</head>
|
21
|
+
<body>
|
22
|
+
<div id='fuga'></div>
|
23
|
+
</body>
|
24
|
+
</html>
|
25
|
+
}
|
26
|
+
|
27
|
+
Sunscraper.scrape_html(HTML).include?('It works!') # => true
|
28
|
+
```
|
27
29
|
|
28
30
|
See also [documentation][].
|
29
31
|
|
@@ -42,7 +44,7 @@ C extension*; it works by building a Qt shared library and loading it through [F
|
|
42
44
|
Runtime requirements
|
43
45
|
--------------------
|
44
46
|
|
45
|
-
On Linux
|
47
|
+
On Linux, Sunscraper requires a running X server and a valid `DISPLAY` environment
|
46
48
|
variable. Consider using [Xvfb][] on a GUI-less production server.
|
47
49
|
|
48
50
|
[Xvfb]: http://www.x.org/releases/X11R7.6/doc/man/man1/Xvfb.1.xhtml
|
@@ -50,6 +52,8 @@ variable. Consider using [Xvfb][] on a GUI-less production server.
|
|
50
52
|
Compatibility
|
51
53
|
-------------
|
52
54
|
|
55
|
+

|
56
|
+
|
53
57
|
Sunscraper should be compatible across all major implementations on all major operating systems, including
|
54
58
|
Ruby MRI 1.9, JRuby, Rubinius and MacRuby running on GNU/Linux, OS X and Windows.
|
55
59
|
|
@@ -65,4 +69,22 @@ Sunscraper is thread-safe.
|
|
65
69
|
License
|
66
70
|
-------
|
67
71
|
|
68
|
-
|
72
|
+
Copyright (C) 2011 by Peter Zotov <p.zotov@roundlake.ru>.
|
73
|
+
|
74
|
+
Permission is hereby granted, free of charge, to any person obtaining a copy
|
75
|
+
of this software and associated documentation files (the "Software"), to deal
|
76
|
+
in the Software without restriction, including without limitation the rights
|
77
|
+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
78
|
+
copies of the Software, and to permit persons to whom the Software is
|
79
|
+
furnished to do so, subject to the following conditions:
|
80
|
+
|
81
|
+
The above copyright notice and this permission notice shall be included in
|
82
|
+
all copies or substantial portions of the Software.
|
83
|
+
|
84
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
85
|
+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
86
|
+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
87
|
+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
88
|
+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
89
|
+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
90
|
+
THE SOFTWARE.
|
data/ext/.gitignore
CHANGED
@@ -0,0 +1,92 @@
|
|
1
|
+
#include <QApplication>
|
2
|
+
#include <QThread>
|
3
|
+
#include <QTimer>
|
4
|
+
#include <QWebPage>
|
5
|
+
#include <QWebFrame>
|
6
|
+
#include <QMutexLocker>
|
7
|
+
#include <QEventLoop>
|
8
|
+
#include <QtDebug>
|
9
|
+
#include "sunscraper.h"
|
10
|
+
#include "sunscraperlibrary.h"
|
11
|
+
#include "sunscraperthread.h"
|
12
|
+
|
13
|
+
unsigned Sunscraper::m_nextQueryId = 1;
|
14
|
+
QMutex Sunscraper::m_staticMutex;
|
15
|
+
|
16
|
+
Sunscraper::Sunscraper()
|
17
|
+
{
|
18
|
+
QMutexLocker locker(&m_staticMutex);
|
19
|
+
|
20
|
+
m_queryId = m_nextQueryId++;
|
21
|
+
|
22
|
+
SunscraperThread *worker = SunscraperThread::instance();
|
23
|
+
if(worker == NULL)
|
24
|
+
qFatal("Attempt to run Sunscraper before thread initialization");
|
25
|
+
|
26
|
+
connect(this, SIGNAL(requestLoadHtml(uint,QString)),
|
27
|
+
worker, SLOT(loadHtml(uint,QString)), Qt::QueuedConnection);
|
28
|
+
connect(this, SIGNAL(requestLoadUrl(uint,QString)),
|
29
|
+
worker, SLOT(loadUrl(uint,QString)), Qt::QueuedConnection);
|
30
|
+
connect(this, SIGNAL(requestFinalize(uint)),
|
31
|
+
worker, SLOT(finalize(uint)), Qt::QueuedConnection);
|
32
|
+
connect(this, SIGNAL(requestTimeout(uint,uint)),
|
33
|
+
worker, SLOT(setTimeout(uint, uint)), Qt::QueuedConnection);
|
34
|
+
|
35
|
+
connect(worker, SIGNAL(finished(uint,QString)),
|
36
|
+
this, SLOT(finished(uint,QString)), Qt::QueuedConnection);
|
37
|
+
connect(worker, SIGNAL(timeout(uint)),
|
38
|
+
this, SLOT(timeout(uint)), Qt::QueuedConnection);
|
39
|
+
|
40
|
+
m_eventLoop = new QEventLoop;
|
41
|
+
}
|
42
|
+
|
43
|
+
void Sunscraper::loadHtml(QString html)
|
44
|
+
{
|
45
|
+
emit requestLoadHtml(m_queryId, html);
|
46
|
+
}
|
47
|
+
|
48
|
+
void Sunscraper::loadUrl(QString url)
|
49
|
+
{
|
50
|
+
emit requestLoadUrl(m_queryId, url);
|
51
|
+
}
|
52
|
+
|
53
|
+
void Sunscraper::wait(unsigned timeout)
|
54
|
+
{
|
55
|
+
emit requestTimeout(m_queryId, timeout);
|
56
|
+
|
57
|
+
m_eventLoop->exec();
|
58
|
+
}
|
59
|
+
|
60
|
+
void Sunscraper::finished(unsigned eventQueryId, QString html)
|
61
|
+
{
|
62
|
+
if(eventQueryId != m_queryId)
|
63
|
+
return;
|
64
|
+
|
65
|
+
m_eventLoop->quit();
|
66
|
+
|
67
|
+
m_html = html.toUtf8();
|
68
|
+
|
69
|
+
emit requestFinalize(m_queryId);
|
70
|
+
}
|
71
|
+
|
72
|
+
void Sunscraper::timeout(unsigned eventQueryId)
|
73
|
+
{
|
74
|
+
if(eventQueryId != m_queryId)
|
75
|
+
return;
|
76
|
+
|
77
|
+
m_eventLoop->quit();
|
78
|
+
|
79
|
+
m_html = "!SUNSCRAPER_TIMEOUT";
|
80
|
+
|
81
|
+
emit requestFinalize(m_queryId);
|
82
|
+
}
|
83
|
+
|
84
|
+
QByteArray Sunscraper::fetch()
|
85
|
+
{
|
86
|
+
return m_html;
|
87
|
+
}
|
88
|
+
|
89
|
+
const char *Sunscraper::fetchAsCString()
|
90
|
+
{
|
91
|
+
return m_html.constData();
|
92
|
+
}
|
@@ -5,9 +5,9 @@
|
|
5
5
|
#include <QString>
|
6
6
|
#include <QMutex>
|
7
7
|
#include <QByteArray>
|
8
|
-
#include <QEventLoop>
|
9
8
|
|
10
9
|
class QWebPage;
|
10
|
+
class QEventLoop;
|
11
11
|
|
12
12
|
class Sunscraper : public QObject
|
13
13
|
{
|
@@ -26,20 +26,22 @@ public:
|
|
26
26
|
|
27
27
|
private slots:
|
28
28
|
void finished(unsigned queryId, QString html);
|
29
|
-
void timeout();
|
29
|
+
void timeout(unsigned queryId);
|
30
30
|
|
31
31
|
signals:
|
32
32
|
void requestLoadHtml(unsigned queryId, QString html);
|
33
33
|
void requestLoadUrl(unsigned queryId, QString html);
|
34
|
+
void requestTimeout(unsigned queryId, unsigned timeout);
|
34
35
|
void requestFinalize(unsigned queryId);
|
35
36
|
|
36
37
|
private:
|
37
|
-
static unsigned
|
38
|
-
static QMutex
|
38
|
+
static unsigned m_nextQueryId;
|
39
|
+
static QMutex m_staticMutex;
|
39
40
|
|
40
|
-
|
41
|
-
|
42
|
-
|
41
|
+
QEventLoop *m_eventLoop;
|
42
|
+
|
43
|
+
unsigned m_queryId;
|
44
|
+
QByteArray m_html;
|
43
45
|
};
|
44
46
|
|
45
47
|
#endif // SUNSCRAPER_H
|
@@ -0,0 +1,39 @@
|
|
1
|
+
#include "sunscraper.h"
|
2
|
+
#include "sunscraperthread.h"
|
3
|
+
|
4
|
+
extern "C" {
|
5
|
+
Sunscraper *sunscraper_create()
|
6
|
+
{
|
7
|
+
return new Sunscraper();
|
8
|
+
}
|
9
|
+
|
10
|
+
void sunscraper_load_html(Sunscraper *sunscraper, const char *html)
|
11
|
+
{
|
12
|
+
sunscraper->loadHtml(html);
|
13
|
+
}
|
14
|
+
|
15
|
+
void sunscraper_load_url(Sunscraper *sunscraper, const char *url)
|
16
|
+
{
|
17
|
+
sunscraper->loadUrl(url);
|
18
|
+
}
|
19
|
+
|
20
|
+
void sunscraper_wait(Sunscraper *sunscraper, unsigned timeout)
|
21
|
+
{
|
22
|
+
sunscraper->wait(timeout);
|
23
|
+
}
|
24
|
+
|
25
|
+
const char *sunscraper_fetch(Sunscraper *sunscraper)
|
26
|
+
{
|
27
|
+
return sunscraper->fetchAsCString();
|
28
|
+
}
|
29
|
+
|
30
|
+
void sunscraper_discard(Sunscraper *sunscraper)
|
31
|
+
{
|
32
|
+
delete sunscraper;
|
33
|
+
}
|
34
|
+
|
35
|
+
void sunscraper_finalize()
|
36
|
+
{
|
37
|
+
SunscraperThread::commitSuicide();
|
38
|
+
}
|
39
|
+
}
|
@@ -1,12 +1,12 @@
|
|
1
1
|
#include "sunscraperlibrary.h"
|
2
2
|
#include "sunscraperthread.h"
|
3
|
+
#include <QtDebug>
|
3
4
|
|
4
|
-
SunscraperLibrary SunscraperLibrary::
|
5
|
+
SunscraperLibrary SunscraperLibrary::m_instance;
|
5
6
|
|
6
7
|
SunscraperLibrary::SunscraperLibrary()
|
7
8
|
{
|
8
|
-
|
9
|
-
_apartmentThread->start();
|
9
|
+
SunscraperThread::invoke();
|
10
10
|
}
|
11
11
|
|
12
12
|
SunscraperLibrary::~SunscraperLibrary()
|
@@ -16,10 +16,5 @@ SunscraperLibrary::~SunscraperLibrary()
|
|
16
16
|
|
17
17
|
SunscraperLibrary *SunscraperLibrary::instance()
|
18
18
|
{
|
19
|
-
return &
|
20
|
-
}
|
21
|
-
|
22
|
-
SunscraperThread *SunscraperLibrary::thread()
|
23
|
-
{
|
24
|
-
return _apartmentThread;
|
19
|
+
return &m_instance;
|
25
20
|
}
|
@@ -7,16 +7,12 @@ class SunscraperLibrary {
|
|
7
7
|
public:
|
8
8
|
static SunscraperLibrary *instance();
|
9
9
|
|
10
|
-
SunscraperThread *thread();
|
11
|
-
|
12
10
|
private:
|
13
11
|
SunscraperLibrary();
|
14
12
|
SunscraperLibrary(SunscraperLibrary &);
|
15
13
|
~SunscraperLibrary();
|
16
14
|
|
17
|
-
static SunscraperLibrary
|
18
|
-
|
19
|
-
SunscraperThread *_apartmentThread;
|
15
|
+
static SunscraperLibrary m_instance;
|
20
16
|
};
|
21
17
|
|
22
18
|
#endif // SUNSCRAPER_H
|
@@ -0,0 +1,14 @@
|
|
1
|
+
#include <QWebPage>
|
2
|
+
#include <QWebFrame>
|
3
|
+
#include <QtDebug>
|
4
|
+
#include "sunscraperproxy.h"
|
5
|
+
|
6
|
+
SunscraperProxy::SunscraperProxy(QWebPage *parent, unsigned queryId) :
|
7
|
+
QObject(parent), m_webPage(parent), m_queryId(queryId)
|
8
|
+
{
|
9
|
+
}
|
10
|
+
|
11
|
+
void SunscraperProxy::finish()
|
12
|
+
{
|
13
|
+
emit finished(m_queryId, m_webPage->mainFrame()->toHtml());
|
14
|
+
}
|
@@ -14,11 +14,11 @@ public:
|
|
14
14
|
Q_INVOKABLE void finish();
|
15
15
|
|
16
16
|
signals:
|
17
|
-
void finished(unsigned
|
17
|
+
void finished(unsigned queryId, QString html);
|
18
18
|
|
19
19
|
private:
|
20
|
-
QWebPage *
|
21
|
-
unsigned
|
20
|
+
QWebPage *m_webPage;
|
21
|
+
unsigned m_queryId;
|
22
22
|
};
|
23
23
|
|
24
24
|
#endif // SUNSCRAPERPROXY_H
|
@@ -0,0 +1,148 @@
|
|
1
|
+
#include <QApplication>
|
2
|
+
#include <QWebPage>
|
3
|
+
#include <QWebFrame>
|
4
|
+
#include <QTimer>
|
5
|
+
#include "sunscraperthread.h"
|
6
|
+
#include "sunscraperproxy.h"
|
7
|
+
#include <QtDebug>
|
8
|
+
#include <time.h>
|
9
|
+
|
10
|
+
#if defined(Q_OS_LINUX) || defined(Q_OS_UNIX)
|
11
|
+
pthread_t SunscraperThread::m_thread;
|
12
|
+
#endif
|
13
|
+
|
14
|
+
SunscraperThread *SunscraperThread::m_instance;
|
15
|
+
QSemaphore SunscraperThread::m_initializationLock;
|
16
|
+
|
17
|
+
SunscraperThread::SunscraperThread()
|
18
|
+
{
|
19
|
+
}
|
20
|
+
|
21
|
+
SunscraperThread *SunscraperThread::instance()
|
22
|
+
{
|
23
|
+
m_initializationLock.acquire(1);
|
24
|
+
m_initializationLock.release(1);
|
25
|
+
|
26
|
+
return m_instance;
|
27
|
+
}
|
28
|
+
|
29
|
+
void SunscraperThread::invoke()
|
30
|
+
{
|
31
|
+
#if defined(Q_OS_LINUX) || defined(Q_OS_UNIX)
|
32
|
+
pthread_create(&m_thread, NULL, &SunscraperThread::thread_routine, NULL);
|
33
|
+
#endif
|
34
|
+
}
|
35
|
+
|
36
|
+
void *SunscraperThread::thread_routine(void *)
|
37
|
+
{
|
38
|
+
/* Better error messages. */
|
39
|
+
int argc = 1;
|
40
|
+
char *argv[] = { (char*) "Sunscraper", NULL};
|
41
|
+
|
42
|
+
/* Why (char*)? Because argv can (theoretically) be modified. *
|
43
|
+
* But Qt won't do that with argv[0]. I know, trust me. */
|
44
|
+
|
45
|
+
//qDebug() << "a";
|
46
|
+
//usleep(1000000);
|
47
|
+
//qDebug() << "b";
|
48
|
+
|
49
|
+
QApplication app(argc, argv);
|
50
|
+
|
51
|
+
if(m_instance != NULL)
|
52
|
+
qFatal("Attempt to invoke SunscraperThread more than once");
|
53
|
+
|
54
|
+
m_instance = new SunscraperThread();
|
55
|
+
m_initializationLock.release(1);
|
56
|
+
|
57
|
+
/* The magic value 42 means we want exit from the loop. */
|
58
|
+
while(app.exec() != 42);
|
59
|
+
|
60
|
+
/* Our application exits. */
|
61
|
+
|
62
|
+
return NULL;
|
63
|
+
}
|
64
|
+
|
65
|
+
void SunscraperThread::commitSuicide()
|
66
|
+
{
|
67
|
+
QApplication::exit(42);
|
68
|
+
|
69
|
+
#if defined(Q_OS_LINUX) || defined(Q_OS_UNIX)
|
70
|
+
pthread_join(m_thread, NULL);
|
71
|
+
#endif
|
72
|
+
}
|
73
|
+
|
74
|
+
void SunscraperThread::loadHtml(unsigned queryId, QString html)
|
75
|
+
{
|
76
|
+
QWebPage *webPage = initializeWebPage(queryId);
|
77
|
+
webPage->mainFrame()->setHtml(html);
|
78
|
+
}
|
79
|
+
|
80
|
+
void SunscraperThread::loadUrl(unsigned queryId, QString url)
|
81
|
+
{
|
82
|
+
QWebPage *webPage = initializeWebPage(queryId);
|
83
|
+
webPage->mainFrame()->load(url);
|
84
|
+
}
|
85
|
+
|
86
|
+
void SunscraperThread::setTimeout(unsigned queryId, unsigned timeout)
|
87
|
+
{
|
88
|
+
Q_ASSERT(m_timers[queryId] == NULL);
|
89
|
+
|
90
|
+
QTimer *timer = new QTimer(this);
|
91
|
+
timer->setInterval(timeout);
|
92
|
+
timer->setSingleShot(true);
|
93
|
+
|
94
|
+
connect(timer, SIGNAL(timeout()), this, SLOT(routeTimeout()));
|
95
|
+
|
96
|
+
timer->start();
|
97
|
+
m_timers[queryId] = timer;
|
98
|
+
}
|
99
|
+
|
100
|
+
void SunscraperThread::finalize(unsigned queryId)
|
101
|
+
{
|
102
|
+
Q_ASSERT(m_webPages[queryId] != NULL);
|
103
|
+
|
104
|
+
m_webPages[queryId]->deleteLater();
|
105
|
+
m_webPages.remove(queryId);
|
106
|
+
|
107
|
+
if(m_timers.contains(queryId)) {
|
108
|
+
m_timers[queryId]->deleteLater();
|
109
|
+
m_timers.remove(queryId);
|
110
|
+
}
|
111
|
+
}
|
112
|
+
|
113
|
+
QWebPage *SunscraperThread::initializeWebPage(unsigned queryId)
|
114
|
+
{
|
115
|
+
Q_ASSERT(m_webPages[queryId] == NULL);
|
116
|
+
|
117
|
+
QWebPage *webPage = new QWebPage(this);
|
118
|
+
connect(webPage->mainFrame(), SIGNAL(javaScriptWindowObjectCleared()),
|
119
|
+
this, SLOT(attachAPI()));
|
120
|
+
|
121
|
+
m_webPages[queryId] = webPage;
|
122
|
+
|
123
|
+
return webPage;
|
124
|
+
}
|
125
|
+
|
126
|
+
void SunscraperThread::attachAPI()
|
127
|
+
{
|
128
|
+
QWebFrame *origin = static_cast<QWebFrame *>(QObject::sender());
|
129
|
+
QWebPage *page = origin->page();
|
130
|
+
|
131
|
+
unsigned queryId = m_webPages.key(page, 0);
|
132
|
+
Q_ASSERT(queryId != 0);
|
133
|
+
|
134
|
+
SunscraperProxy *proxy = new SunscraperProxy(page, queryId);
|
135
|
+
connect(proxy, SIGNAL(finished(uint,QString)), this, SIGNAL(finished(uint,QString)));
|
136
|
+
|
137
|
+
origin->addToJavaScriptWindowObject("Sunscraper", proxy, QScriptEngine::QtOwnership);
|
138
|
+
}
|
139
|
+
|
140
|
+
void SunscraperThread::routeTimeout()
|
141
|
+
{
|
142
|
+
QTimer *origin = static_cast<QTimer *>(QObject::sender());
|
143
|
+
|
144
|
+
unsigned queryId = m_timers.key(origin, 0);
|
145
|
+
Q_ASSERT(queryId != 0);
|
146
|
+
|
147
|
+
emit timeout(queryId);
|
148
|
+
}
|
@@ -0,0 +1,54 @@
|
|
1
|
+
#ifndef SUNSCRAPERTHREAD_H
|
2
|
+
#define SUNSCRAPERTHREAD_H
|
3
|
+
|
4
|
+
#include <QObject>
|
5
|
+
#include <QSemaphore>
|
6
|
+
#include <QMap>
|
7
|
+
|
8
|
+
class QWebPage;
|
9
|
+
class QTimer;
|
10
|
+
|
11
|
+
class SunscraperThread : public QObject
|
12
|
+
{
|
13
|
+
Q_OBJECT
|
14
|
+
public:
|
15
|
+
static void invoke();
|
16
|
+
static void commitSuicide();
|
17
|
+
static SunscraperThread *instance();
|
18
|
+
|
19
|
+
signals:
|
20
|
+
void finished(unsigned queryId, QString result);
|
21
|
+
void timeout(unsigned queryId);
|
22
|
+
|
23
|
+
public slots:
|
24
|
+
void loadHtml(unsigned queryId, QString html);
|
25
|
+
void loadUrl(unsigned queryId, QString url);
|
26
|
+
void setTimeout(unsigned queryId, unsigned timeout);
|
27
|
+
void finalize(unsigned queryId);
|
28
|
+
|
29
|
+
private slots:
|
30
|
+
void attachAPI();
|
31
|
+
void routeTimeout();
|
32
|
+
|
33
|
+
private:
|
34
|
+
static SunscraperThread *m_instance;
|
35
|
+
static QSemaphore m_initializationLock;
|
36
|
+
|
37
|
+
#if defined(Q_OS_LINUX) || defined(Q_OS_UNIX)
|
38
|
+
static pthread_t m_thread;
|
39
|
+
#else
|
40
|
+
#error Your platform is unsupported. Implement SunscraperThread::invoke() and send a pull request.
|
41
|
+
#endif
|
42
|
+
|
43
|
+
static void *thread_routine(void *arg);
|
44
|
+
|
45
|
+
QMap<unsigned, QWebPage *> m_webPages;
|
46
|
+
QMap<unsigned, QTimer *> m_timers;
|
47
|
+
|
48
|
+
SunscraperThread();
|
49
|
+
SunscraperThread(SunscraperThread &);
|
50
|
+
|
51
|
+
QWebPage *initializeWebPage(unsigned queryId);
|
52
|
+
};
|
53
|
+
|
54
|
+
#endif // SUNSCRAPERTHREAD_H
|
data/ext/extconf.rb
CHANGED
@@ -1,9 +1,19 @@
|
|
1
1
|
# This Makefile will get replaced by qmake.
|
2
2
|
|
3
|
+
if Gem.win_platform?
|
4
|
+
qmake = %{qmake -spec win32-g++}
|
5
|
+
elsif RUBY_PLATFORM =~ /darwin/i || RbConfig::CONFIG['target_os'] == 'darwin'
|
6
|
+
qmake = %{qmake -spec macx-g++}
|
7
|
+
else
|
8
|
+
qmake = %{qmake}
|
9
|
+
end
|
10
|
+
|
3
11
|
File.open("Makefile", "w") do |mf|
|
4
12
|
mf.puts <<-ENDM
|
5
13
|
all:
|
6
|
-
qmake
|
7
|
-
|
14
|
+
cd embed; #{qmake}
|
15
|
+
cd standalone; #{qmake}
|
16
|
+
make -C embed
|
17
|
+
make -C standalone
|
8
18
|
ENDM
|
9
|
-
end
|
19
|
+
end
|
@@ -0,0 +1,13 @@
|
|
1
|
+
#include <QApplication>
|
2
|
+
#include <QStringList>
|
3
|
+
#include "sunscraperworker.h"
|
4
|
+
#include "sunscraperrpc.h"
|
5
|
+
|
6
|
+
int main(int argc, char **argv)
|
7
|
+
{
|
8
|
+
QApplication app(argc, argv);
|
9
|
+
|
10
|
+
SunscraperRPC rpc(app.arguments().at(1));
|
11
|
+
|
12
|
+
return app.exec();
|
13
|
+
}
|
@@ -3,11 +3,11 @@
|
|
3
3
|
#include "sunscraperproxy.h"
|
4
4
|
|
5
5
|
SunscraperProxy::SunscraperProxy(QWebPage *parent, unsigned queryId) :
|
6
|
-
QObject(parent),
|
6
|
+
QObject(parent), m_webPage(parent), m_queryId(queryId)
|
7
7
|
{
|
8
8
|
}
|
9
9
|
|
10
10
|
void SunscraperProxy::finish()
|
11
11
|
{
|
12
|
-
emit finished(
|
12
|
+
emit finished(m_queryId, m_webPage->mainFrame()->toHtml());
|
13
13
|
}
|
@@ -0,0 +1,24 @@
|
|
1
|
+
#ifndef SUNSCRAPERPROXY_H
|
2
|
+
#define SUNSCRAPERPROXY_H
|
3
|
+
|
4
|
+
#include <QObject>
|
5
|
+
|
6
|
+
class QWebPage;
|
7
|
+
|
8
|
+
class SunscraperProxy : public QObject
|
9
|
+
{
|
10
|
+
Q_OBJECT
|
11
|
+
public:
|
12
|
+
SunscraperProxy(QWebPage *parent, unsigned queryId);
|
13
|
+
|
14
|
+
Q_INVOKABLE void finish();
|
15
|
+
|
16
|
+
signals:
|
17
|
+
void finished(unsigned queryId, QString html);
|
18
|
+
|
19
|
+
private:
|
20
|
+
QWebPage *m_webPage;
|
21
|
+
unsigned m_queryId;
|
22
|
+
};
|
23
|
+
|
24
|
+
#endif // SUNSCRAPERPROXY_H
|