sunscraper 1.0.0 → 1.1.0.beta1
Sign up to get free protection for your applications and to get access to all the features.
- data/.travis.yml +28 -0
- data/README.md +42 -20
- data/ext/.gitignore +5 -1
- data/ext/embed/sunscraper.cpp +92 -0
- data/ext/{sunscraper.h → embed/sunscraper.h} +9 -7
- data/ext/{sunscraper.pro → embed/sunscraper.pro} +2 -0
- data/ext/embed/sunscraperexternal.cpp +39 -0
- data/ext/{sunscraperlibrary.cpp → embed/sunscraperlibrary.cpp} +4 -9
- data/ext/{sunscraperlibrary.h → embed/sunscraperlibrary.h} +1 -5
- data/ext/embed/sunscraperproxy.cpp +14 -0
- data/ext/{sunscraperproxy.h → embed/sunscraperproxy.h} +3 -3
- data/ext/embed/sunscraperthread.cpp +148 -0
- data/ext/embed/sunscraperthread.h +54 -0
- data/ext/extconf.rb +13 -3
- data/ext/standalone/sunscraper.pro +13 -0
- data/ext/standalone/sunscrapermain.cpp +13 -0
- data/ext/{sunscraperproxy.cpp → standalone/sunscraperproxy.cpp} +2 -2
- data/ext/standalone/sunscraperproxy.h +24 -0
- data/ext/standalone/sunscraperrpc.cpp +183 -0
- data/ext/standalone/sunscraperrpc.h +64 -0
- data/ext/{sunscraperthread.cpp → standalone/sunscraperworker.cpp} +9 -18
- data/ext/{sunscraperthread.h → standalone/sunscraperworker.h} +8 -8
- data/lib/sunscraper/library.rb +33 -29
- data/lib/sunscraper/standalone.rb +168 -0
- data/lib/sunscraper.rb +48 -11
- data/spec/sunscraper_spec.rb +59 -13
- data/sunscraper.gemspec +2 -2
- metadata +58 -75
- data/ext/Makefile +0 -270
- data/ext/sunscraper.cpp +0 -86
- data/ext/sunscraperexternal.cpp +0 -33
data/.travis.yml
ADDED
@@ -0,0 +1,28 @@
|
|
1
|
+
language: ruby
|
2
|
+
install:
|
3
|
+
- "sudo apt-get install qt4-dev-tools --no-install-recommends"
|
4
|
+
- "(cd ext/; ruby extconf.rb; make)"
|
5
|
+
- "bundle install"
|
6
|
+
script:
|
7
|
+
- "xvfb-run bundle exec rspec"
|
8
|
+
rvm:
|
9
|
+
- 1.9.2
|
10
|
+
- 1.9.3
|
11
|
+
- jruby-19mode
|
12
|
+
- jruby-head
|
13
|
+
- rbx-19mode
|
14
|
+
env:
|
15
|
+
- EXPERIMENTAL=true
|
16
|
+
- EXPERIMENTAL=false
|
17
|
+
matrix:
|
18
|
+
exclude:
|
19
|
+
- rvm: 1.9.2
|
20
|
+
env: EXPERIMENTAL=true
|
21
|
+
- rvm: 1.9.3
|
22
|
+
env: EXPERIMENTAL=true
|
23
|
+
allow_fail:
|
24
|
+
- env: EXPERIMENTAL=true
|
25
|
+
notifications:
|
26
|
+
email:
|
27
|
+
- boris@roundlake.ru
|
28
|
+
- p.zotov@roundlake.ru
|
data/README.md
CHANGED
@@ -6,24 +6,26 @@ Sunscraper is a gem for prerendering pages with hashbang URLs like `http://whate
|
|
6
6
|
It works by loading content in the embedded web browser and waiting for a JavaScript method to be
|
7
7
|
called.
|
8
8
|
|
9
|
-
|
10
|
-
|
11
|
-
|
12
|
-
|
13
|
-
|
14
|
-
|
15
|
-
|
16
|
-
|
17
|
-
|
18
|
-
|
19
|
-
|
20
|
-
|
21
|
-
|
22
|
-
|
23
|
-
|
24
|
-
|
25
|
-
|
26
|
-
|
9
|
+
``` ruby
|
10
|
+
HTML = %{
|
11
|
+
<html>
|
12
|
+
<head>
|
13
|
+
<script type="text/javascript">
|
14
|
+
document.addEventListener("DOMContentLoaded", function() {
|
15
|
+
document.getElementById('fuga').textContent =
|
16
|
+
("!skrow tI").split("").reverse().join("");
|
17
|
+
Sunscraper.finish();
|
18
|
+
}, true);
|
19
|
+
</script>
|
20
|
+
</head>
|
21
|
+
<body>
|
22
|
+
<div id='fuga'></div>
|
23
|
+
</body>
|
24
|
+
</html>
|
25
|
+
}
|
26
|
+
|
27
|
+
Sunscraper.scrape_html(HTML).include?('It works!') # => true
|
28
|
+
```
|
27
29
|
|
28
30
|
See also [documentation][].
|
29
31
|
|
@@ -42,7 +44,7 @@ C extension*; it works by building a Qt shared library and loading it through [F
|
|
42
44
|
Runtime requirements
|
43
45
|
--------------------
|
44
46
|
|
45
|
-
On Linux
|
47
|
+
On Linux, Sunscraper requires a running X server and a valid `DISPLAY` environment
|
46
48
|
variable. Consider using [Xvfb][] on a GUI-less production server.
|
47
49
|
|
48
50
|
[Xvfb]: http://www.x.org/releases/X11R7.6/doc/man/man1/Xvfb.1.xhtml
|
@@ -50,6 +52,8 @@ variable. Consider using [Xvfb][] on a GUI-less production server.
|
|
50
52
|
Compatibility
|
51
53
|
-------------
|
52
54
|
|
55
|
+
![Travis CI](https://secure.travis-ci.org/roundlake/sunscraper.png)
|
56
|
+
|
53
57
|
Sunscraper should be compatible across all major implementations on all major operating systems, including
|
54
58
|
Ruby MRI 1.9, JRuby, Rubinius and MacRuby running on GNU/Linux, OS X and Windows.
|
55
59
|
|
@@ -65,4 +69,22 @@ Sunscraper is thread-safe.
|
|
65
69
|
License
|
66
70
|
-------
|
67
71
|
|
68
|
-
|
72
|
+
Copyright (C) 2011 by Peter Zotov <p.zotov@roundlake.ru>.
|
73
|
+
|
74
|
+
Permission is hereby granted, free of charge, to any person obtaining a copy
|
75
|
+
of this software and associated documentation files (the "Software"), to deal
|
76
|
+
in the Software without restriction, including without limitation the rights
|
77
|
+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
78
|
+
copies of the Software, and to permit persons to whom the Software is
|
79
|
+
furnished to do so, subject to the following conditions:
|
80
|
+
|
81
|
+
The above copyright notice and this permission notice shall be included in
|
82
|
+
all copies or substantial portions of the Software.
|
83
|
+
|
84
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
85
|
+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
86
|
+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
87
|
+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
88
|
+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
89
|
+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
90
|
+
THE SOFTWARE.
|
data/ext/.gitignore
CHANGED
@@ -0,0 +1,92 @@
|
|
1
|
+
#include <QApplication>
|
2
|
+
#include <QThread>
|
3
|
+
#include <QTimer>
|
4
|
+
#include <QWebPage>
|
5
|
+
#include <QWebFrame>
|
6
|
+
#include <QMutexLocker>
|
7
|
+
#include <QEventLoop>
|
8
|
+
#include <QtDebug>
|
9
|
+
#include "sunscraper.h"
|
10
|
+
#include "sunscraperlibrary.h"
|
11
|
+
#include "sunscraperthread.h"
|
12
|
+
|
13
|
+
unsigned Sunscraper::m_nextQueryId = 1;
|
14
|
+
QMutex Sunscraper::m_staticMutex;
|
15
|
+
|
16
|
+
Sunscraper::Sunscraper()
|
17
|
+
{
|
18
|
+
QMutexLocker locker(&m_staticMutex);
|
19
|
+
|
20
|
+
m_queryId = m_nextQueryId++;
|
21
|
+
|
22
|
+
SunscraperThread *worker = SunscraperThread::instance();
|
23
|
+
if(worker == NULL)
|
24
|
+
qFatal("Attempt to run Sunscraper before thread initialization");
|
25
|
+
|
26
|
+
connect(this, SIGNAL(requestLoadHtml(uint,QString)),
|
27
|
+
worker, SLOT(loadHtml(uint,QString)), Qt::QueuedConnection);
|
28
|
+
connect(this, SIGNAL(requestLoadUrl(uint,QString)),
|
29
|
+
worker, SLOT(loadUrl(uint,QString)), Qt::QueuedConnection);
|
30
|
+
connect(this, SIGNAL(requestFinalize(uint)),
|
31
|
+
worker, SLOT(finalize(uint)), Qt::QueuedConnection);
|
32
|
+
connect(this, SIGNAL(requestTimeout(uint,uint)),
|
33
|
+
worker, SLOT(setTimeout(uint, uint)), Qt::QueuedConnection);
|
34
|
+
|
35
|
+
connect(worker, SIGNAL(finished(uint,QString)),
|
36
|
+
this, SLOT(finished(uint,QString)), Qt::QueuedConnection);
|
37
|
+
connect(worker, SIGNAL(timeout(uint)),
|
38
|
+
this, SLOT(timeout(uint)), Qt::QueuedConnection);
|
39
|
+
|
40
|
+
m_eventLoop = new QEventLoop;
|
41
|
+
}
|
42
|
+
|
43
|
+
void Sunscraper::loadHtml(QString html)
|
44
|
+
{
|
45
|
+
emit requestLoadHtml(m_queryId, html);
|
46
|
+
}
|
47
|
+
|
48
|
+
void Sunscraper::loadUrl(QString url)
|
49
|
+
{
|
50
|
+
emit requestLoadUrl(m_queryId, url);
|
51
|
+
}
|
52
|
+
|
53
|
+
void Sunscraper::wait(unsigned timeout)
|
54
|
+
{
|
55
|
+
emit requestTimeout(m_queryId, timeout);
|
56
|
+
|
57
|
+
m_eventLoop->exec();
|
58
|
+
}
|
59
|
+
|
60
|
+
void Sunscraper::finished(unsigned eventQueryId, QString html)
|
61
|
+
{
|
62
|
+
if(eventQueryId != m_queryId)
|
63
|
+
return;
|
64
|
+
|
65
|
+
m_eventLoop->quit();
|
66
|
+
|
67
|
+
m_html = html.toUtf8();
|
68
|
+
|
69
|
+
emit requestFinalize(m_queryId);
|
70
|
+
}
|
71
|
+
|
72
|
+
void Sunscraper::timeout(unsigned eventQueryId)
|
73
|
+
{
|
74
|
+
if(eventQueryId != m_queryId)
|
75
|
+
return;
|
76
|
+
|
77
|
+
m_eventLoop->quit();
|
78
|
+
|
79
|
+
m_html = "!SUNSCRAPER_TIMEOUT";
|
80
|
+
|
81
|
+
emit requestFinalize(m_queryId);
|
82
|
+
}
|
83
|
+
|
84
|
+
QByteArray Sunscraper::fetch()
|
85
|
+
{
|
86
|
+
return m_html;
|
87
|
+
}
|
88
|
+
|
89
|
+
const char *Sunscraper::fetchAsCString()
|
90
|
+
{
|
91
|
+
return m_html.constData();
|
92
|
+
}
|
@@ -5,9 +5,9 @@
|
|
5
5
|
#include <QString>
|
6
6
|
#include <QMutex>
|
7
7
|
#include <QByteArray>
|
8
|
-
#include <QEventLoop>
|
9
8
|
|
10
9
|
class QWebPage;
|
10
|
+
class QEventLoop;
|
11
11
|
|
12
12
|
class Sunscraper : public QObject
|
13
13
|
{
|
@@ -26,20 +26,22 @@ public:
|
|
26
26
|
|
27
27
|
private slots:
|
28
28
|
void finished(unsigned queryId, QString html);
|
29
|
-
void timeout();
|
29
|
+
void timeout(unsigned queryId);
|
30
30
|
|
31
31
|
signals:
|
32
32
|
void requestLoadHtml(unsigned queryId, QString html);
|
33
33
|
void requestLoadUrl(unsigned queryId, QString html);
|
34
|
+
void requestTimeout(unsigned queryId, unsigned timeout);
|
34
35
|
void requestFinalize(unsigned queryId);
|
35
36
|
|
36
37
|
private:
|
37
|
-
static unsigned
|
38
|
-
static QMutex
|
38
|
+
static unsigned m_nextQueryId;
|
39
|
+
static QMutex m_staticMutex;
|
39
40
|
|
40
|
-
|
41
|
-
|
42
|
-
|
41
|
+
QEventLoop *m_eventLoop;
|
42
|
+
|
43
|
+
unsigned m_queryId;
|
44
|
+
QByteArray m_html;
|
43
45
|
};
|
44
46
|
|
45
47
|
#endif // SUNSCRAPER_H
|
@@ -0,0 +1,39 @@
|
|
1
|
+
#include "sunscraper.h"
|
2
|
+
#include "sunscraperthread.h"
|
3
|
+
|
4
|
+
extern "C" {
|
5
|
+
Sunscraper *sunscraper_create()
|
6
|
+
{
|
7
|
+
return new Sunscraper();
|
8
|
+
}
|
9
|
+
|
10
|
+
void sunscraper_load_html(Sunscraper *sunscraper, const char *html)
|
11
|
+
{
|
12
|
+
sunscraper->loadHtml(html);
|
13
|
+
}
|
14
|
+
|
15
|
+
void sunscraper_load_url(Sunscraper *sunscraper, const char *url)
|
16
|
+
{
|
17
|
+
sunscraper->loadUrl(url);
|
18
|
+
}
|
19
|
+
|
20
|
+
void sunscraper_wait(Sunscraper *sunscraper, unsigned timeout)
|
21
|
+
{
|
22
|
+
sunscraper->wait(timeout);
|
23
|
+
}
|
24
|
+
|
25
|
+
const char *sunscraper_fetch(Sunscraper *sunscraper)
|
26
|
+
{
|
27
|
+
return sunscraper->fetchAsCString();
|
28
|
+
}
|
29
|
+
|
30
|
+
void sunscraper_discard(Sunscraper *sunscraper)
|
31
|
+
{
|
32
|
+
delete sunscraper;
|
33
|
+
}
|
34
|
+
|
35
|
+
void sunscraper_finalize()
|
36
|
+
{
|
37
|
+
SunscraperThread::commitSuicide();
|
38
|
+
}
|
39
|
+
}
|
@@ -1,12 +1,12 @@
|
|
1
1
|
#include "sunscraperlibrary.h"
|
2
2
|
#include "sunscraperthread.h"
|
3
|
+
#include <QtDebug>
|
3
4
|
|
4
|
-
SunscraperLibrary SunscraperLibrary::
|
5
|
+
SunscraperLibrary SunscraperLibrary::m_instance;
|
5
6
|
|
6
7
|
SunscraperLibrary::SunscraperLibrary()
|
7
8
|
{
|
8
|
-
|
9
|
-
_apartmentThread->start();
|
9
|
+
SunscraperThread::invoke();
|
10
10
|
}
|
11
11
|
|
12
12
|
SunscraperLibrary::~SunscraperLibrary()
|
@@ -16,10 +16,5 @@ SunscraperLibrary::~SunscraperLibrary()
|
|
16
16
|
|
17
17
|
SunscraperLibrary *SunscraperLibrary::instance()
|
18
18
|
{
|
19
|
-
return &
|
20
|
-
}
|
21
|
-
|
22
|
-
SunscraperThread *SunscraperLibrary::thread()
|
23
|
-
{
|
24
|
-
return _apartmentThread;
|
19
|
+
return &m_instance;
|
25
20
|
}
|
@@ -7,16 +7,12 @@ class SunscraperLibrary {
|
|
7
7
|
public:
|
8
8
|
static SunscraperLibrary *instance();
|
9
9
|
|
10
|
-
SunscraperThread *thread();
|
11
|
-
|
12
10
|
private:
|
13
11
|
SunscraperLibrary();
|
14
12
|
SunscraperLibrary(SunscraperLibrary &);
|
15
13
|
~SunscraperLibrary();
|
16
14
|
|
17
|
-
static SunscraperLibrary
|
18
|
-
|
19
|
-
SunscraperThread *_apartmentThread;
|
15
|
+
static SunscraperLibrary m_instance;
|
20
16
|
};
|
21
17
|
|
22
18
|
#endif // SUNSCRAPER_H
|
@@ -0,0 +1,14 @@
|
|
1
|
+
#include <QWebPage>
|
2
|
+
#include <QWebFrame>
|
3
|
+
#include <QtDebug>
|
4
|
+
#include "sunscraperproxy.h"
|
5
|
+
|
6
|
+
SunscraperProxy::SunscraperProxy(QWebPage *parent, unsigned queryId) :
|
7
|
+
QObject(parent), m_webPage(parent), m_queryId(queryId)
|
8
|
+
{
|
9
|
+
}
|
10
|
+
|
11
|
+
void SunscraperProxy::finish()
|
12
|
+
{
|
13
|
+
emit finished(m_queryId, m_webPage->mainFrame()->toHtml());
|
14
|
+
}
|
@@ -14,11 +14,11 @@ public:
|
|
14
14
|
Q_INVOKABLE void finish();
|
15
15
|
|
16
16
|
signals:
|
17
|
-
void finished(unsigned
|
17
|
+
void finished(unsigned queryId, QString html);
|
18
18
|
|
19
19
|
private:
|
20
|
-
QWebPage *
|
21
|
-
unsigned
|
20
|
+
QWebPage *m_webPage;
|
21
|
+
unsigned m_queryId;
|
22
22
|
};
|
23
23
|
|
24
24
|
#endif // SUNSCRAPERPROXY_H
|
@@ -0,0 +1,148 @@
|
|
1
|
+
#include <QApplication>
|
2
|
+
#include <QWebPage>
|
3
|
+
#include <QWebFrame>
|
4
|
+
#include <QTimer>
|
5
|
+
#include "sunscraperthread.h"
|
6
|
+
#include "sunscraperproxy.h"
|
7
|
+
#include <QtDebug>
|
8
|
+
#include <time.h>
|
9
|
+
|
10
|
+
#if defined(Q_OS_LINUX) || defined(Q_OS_UNIX)
|
11
|
+
pthread_t SunscraperThread::m_thread;
|
12
|
+
#endif
|
13
|
+
|
14
|
+
SunscraperThread *SunscraperThread::m_instance;
|
15
|
+
QSemaphore SunscraperThread::m_initializationLock;
|
16
|
+
|
17
|
+
SunscraperThread::SunscraperThread()
|
18
|
+
{
|
19
|
+
}
|
20
|
+
|
21
|
+
SunscraperThread *SunscraperThread::instance()
|
22
|
+
{
|
23
|
+
m_initializationLock.acquire(1);
|
24
|
+
m_initializationLock.release(1);
|
25
|
+
|
26
|
+
return m_instance;
|
27
|
+
}
|
28
|
+
|
29
|
+
void SunscraperThread::invoke()
|
30
|
+
{
|
31
|
+
#if defined(Q_OS_LINUX) || defined(Q_OS_UNIX)
|
32
|
+
pthread_create(&m_thread, NULL, &SunscraperThread::thread_routine, NULL);
|
33
|
+
#endif
|
34
|
+
}
|
35
|
+
|
36
|
+
void *SunscraperThread::thread_routine(void *)
|
37
|
+
{
|
38
|
+
/* Better error messages. */
|
39
|
+
int argc = 1;
|
40
|
+
char *argv[] = { (char*) "Sunscraper", NULL};
|
41
|
+
|
42
|
+
/* Why (char*)? Because argv can (theoretically) be modified. *
|
43
|
+
* But Qt won't do that with argv[0]. I know, trust me. */
|
44
|
+
|
45
|
+
//qDebug() << "a";
|
46
|
+
//usleep(1000000);
|
47
|
+
//qDebug() << "b";
|
48
|
+
|
49
|
+
QApplication app(argc, argv);
|
50
|
+
|
51
|
+
if(m_instance != NULL)
|
52
|
+
qFatal("Attempt to invoke SunscraperThread more than once");
|
53
|
+
|
54
|
+
m_instance = new SunscraperThread();
|
55
|
+
m_initializationLock.release(1);
|
56
|
+
|
57
|
+
/* The magic value 42 means we want exit from the loop. */
|
58
|
+
while(app.exec() != 42);
|
59
|
+
|
60
|
+
/* Our application exits. */
|
61
|
+
|
62
|
+
return NULL;
|
63
|
+
}
|
64
|
+
|
65
|
+
void SunscraperThread::commitSuicide()
|
66
|
+
{
|
67
|
+
QApplication::exit(42);
|
68
|
+
|
69
|
+
#if defined(Q_OS_LINUX) || defined(Q_OS_UNIX)
|
70
|
+
pthread_join(m_thread, NULL);
|
71
|
+
#endif
|
72
|
+
}
|
73
|
+
|
74
|
+
void SunscraperThread::loadHtml(unsigned queryId, QString html)
|
75
|
+
{
|
76
|
+
QWebPage *webPage = initializeWebPage(queryId);
|
77
|
+
webPage->mainFrame()->setHtml(html);
|
78
|
+
}
|
79
|
+
|
80
|
+
void SunscraperThread::loadUrl(unsigned queryId, QString url)
|
81
|
+
{
|
82
|
+
QWebPage *webPage = initializeWebPage(queryId);
|
83
|
+
webPage->mainFrame()->load(url);
|
84
|
+
}
|
85
|
+
|
86
|
+
void SunscraperThread::setTimeout(unsigned queryId, unsigned timeout)
|
87
|
+
{
|
88
|
+
Q_ASSERT(m_timers[queryId] == NULL);
|
89
|
+
|
90
|
+
QTimer *timer = new QTimer(this);
|
91
|
+
timer->setInterval(timeout);
|
92
|
+
timer->setSingleShot(true);
|
93
|
+
|
94
|
+
connect(timer, SIGNAL(timeout()), this, SLOT(routeTimeout()));
|
95
|
+
|
96
|
+
timer->start();
|
97
|
+
m_timers[queryId] = timer;
|
98
|
+
}
|
99
|
+
|
100
|
+
void SunscraperThread::finalize(unsigned queryId)
|
101
|
+
{
|
102
|
+
Q_ASSERT(m_webPages[queryId] != NULL);
|
103
|
+
|
104
|
+
m_webPages[queryId]->deleteLater();
|
105
|
+
m_webPages.remove(queryId);
|
106
|
+
|
107
|
+
if(m_timers.contains(queryId)) {
|
108
|
+
m_timers[queryId]->deleteLater();
|
109
|
+
m_timers.remove(queryId);
|
110
|
+
}
|
111
|
+
}
|
112
|
+
|
113
|
+
QWebPage *SunscraperThread::initializeWebPage(unsigned queryId)
|
114
|
+
{
|
115
|
+
Q_ASSERT(m_webPages[queryId] == NULL);
|
116
|
+
|
117
|
+
QWebPage *webPage = new QWebPage(this);
|
118
|
+
connect(webPage->mainFrame(), SIGNAL(javaScriptWindowObjectCleared()),
|
119
|
+
this, SLOT(attachAPI()));
|
120
|
+
|
121
|
+
m_webPages[queryId] = webPage;
|
122
|
+
|
123
|
+
return webPage;
|
124
|
+
}
|
125
|
+
|
126
|
+
void SunscraperThread::attachAPI()
|
127
|
+
{
|
128
|
+
QWebFrame *origin = static_cast<QWebFrame *>(QObject::sender());
|
129
|
+
QWebPage *page = origin->page();
|
130
|
+
|
131
|
+
unsigned queryId = m_webPages.key(page, 0);
|
132
|
+
Q_ASSERT(queryId != 0);
|
133
|
+
|
134
|
+
SunscraperProxy *proxy = new SunscraperProxy(page, queryId);
|
135
|
+
connect(proxy, SIGNAL(finished(uint,QString)), this, SIGNAL(finished(uint,QString)));
|
136
|
+
|
137
|
+
origin->addToJavaScriptWindowObject("Sunscraper", proxy, QScriptEngine::QtOwnership);
|
138
|
+
}
|
139
|
+
|
140
|
+
void SunscraperThread::routeTimeout()
|
141
|
+
{
|
142
|
+
QTimer *origin = static_cast<QTimer *>(QObject::sender());
|
143
|
+
|
144
|
+
unsigned queryId = m_timers.key(origin, 0);
|
145
|
+
Q_ASSERT(queryId != 0);
|
146
|
+
|
147
|
+
emit timeout(queryId);
|
148
|
+
}
|
@@ -0,0 +1,54 @@
|
|
1
|
+
#ifndef SUNSCRAPERTHREAD_H
|
2
|
+
#define SUNSCRAPERTHREAD_H
|
3
|
+
|
4
|
+
#include <QObject>
|
5
|
+
#include <QSemaphore>
|
6
|
+
#include <QMap>
|
7
|
+
|
8
|
+
class QWebPage;
|
9
|
+
class QTimer;
|
10
|
+
|
11
|
+
class SunscraperThread : public QObject
|
12
|
+
{
|
13
|
+
Q_OBJECT
|
14
|
+
public:
|
15
|
+
static void invoke();
|
16
|
+
static void commitSuicide();
|
17
|
+
static SunscraperThread *instance();
|
18
|
+
|
19
|
+
signals:
|
20
|
+
void finished(unsigned queryId, QString result);
|
21
|
+
void timeout(unsigned queryId);
|
22
|
+
|
23
|
+
public slots:
|
24
|
+
void loadHtml(unsigned queryId, QString html);
|
25
|
+
void loadUrl(unsigned queryId, QString url);
|
26
|
+
void setTimeout(unsigned queryId, unsigned timeout);
|
27
|
+
void finalize(unsigned queryId);
|
28
|
+
|
29
|
+
private slots:
|
30
|
+
void attachAPI();
|
31
|
+
void routeTimeout();
|
32
|
+
|
33
|
+
private:
|
34
|
+
static SunscraperThread *m_instance;
|
35
|
+
static QSemaphore m_initializationLock;
|
36
|
+
|
37
|
+
#if defined(Q_OS_LINUX) || defined(Q_OS_UNIX)
|
38
|
+
static pthread_t m_thread;
|
39
|
+
#else
|
40
|
+
#error Your platform is unsupported. Implement SunscraperThread::invoke() and send a pull request.
|
41
|
+
#endif
|
42
|
+
|
43
|
+
static void *thread_routine(void *arg);
|
44
|
+
|
45
|
+
QMap<unsigned, QWebPage *> m_webPages;
|
46
|
+
QMap<unsigned, QTimer *> m_timers;
|
47
|
+
|
48
|
+
SunscraperThread();
|
49
|
+
SunscraperThread(SunscraperThread &);
|
50
|
+
|
51
|
+
QWebPage *initializeWebPage(unsigned queryId);
|
52
|
+
};
|
53
|
+
|
54
|
+
#endif // SUNSCRAPERTHREAD_H
|
data/ext/extconf.rb
CHANGED
@@ -1,9 +1,19 @@
|
|
1
1
|
# This Makefile will get replaced by qmake.
|
2
2
|
|
3
|
+
if Gem.win_platform?
|
4
|
+
qmake = %{qmake -spec win32-g++}
|
5
|
+
elsif RUBY_PLATFORM =~ /darwin/i || RbConfig::CONFIG['target_os'] == 'darwin'
|
6
|
+
qmake = %{qmake -spec macx-g++}
|
7
|
+
else
|
8
|
+
qmake = %{qmake}
|
9
|
+
end
|
10
|
+
|
3
11
|
File.open("Makefile", "w") do |mf|
|
4
12
|
mf.puts <<-ENDM
|
5
13
|
all:
|
6
|
-
qmake
|
7
|
-
|
14
|
+
cd embed; #{qmake}
|
15
|
+
cd standalone; #{qmake}
|
16
|
+
make -C embed
|
17
|
+
make -C standalone
|
8
18
|
ENDM
|
9
|
-
end
|
19
|
+
end
|
@@ -0,0 +1,13 @@
|
|
1
|
+
#include <QApplication>
|
2
|
+
#include <QStringList>
|
3
|
+
#include "sunscraperworker.h"
|
4
|
+
#include "sunscraperrpc.h"
|
5
|
+
|
6
|
+
int main(int argc, char **argv)
|
7
|
+
{
|
8
|
+
QApplication app(argc, argv);
|
9
|
+
|
10
|
+
SunscraperRPC rpc(app.arguments().at(1));
|
11
|
+
|
12
|
+
return app.exec();
|
13
|
+
}
|
@@ -3,11 +3,11 @@
|
|
3
3
|
#include "sunscraperproxy.h"
|
4
4
|
|
5
5
|
SunscraperProxy::SunscraperProxy(QWebPage *parent, unsigned queryId) :
|
6
|
-
QObject(parent),
|
6
|
+
QObject(parent), m_webPage(parent), m_queryId(queryId)
|
7
7
|
{
|
8
8
|
}
|
9
9
|
|
10
10
|
void SunscraperProxy::finish()
|
11
11
|
{
|
12
|
-
emit finished(
|
12
|
+
emit finished(m_queryId, m_webPage->mainFrame()->toHtml());
|
13
13
|
}
|
@@ -0,0 +1,24 @@
|
|
1
|
+
#ifndef SUNSCRAPERPROXY_H
|
2
|
+
#define SUNSCRAPERPROXY_H
|
3
|
+
|
4
|
+
#include <QObject>
|
5
|
+
|
6
|
+
class QWebPage;
|
7
|
+
|
8
|
+
class SunscraperProxy : public QObject
|
9
|
+
{
|
10
|
+
Q_OBJECT
|
11
|
+
public:
|
12
|
+
SunscraperProxy(QWebPage *parent, unsigned queryId);
|
13
|
+
|
14
|
+
Q_INVOKABLE void finish();
|
15
|
+
|
16
|
+
signals:
|
17
|
+
void finished(unsigned queryId, QString html);
|
18
|
+
|
19
|
+
private:
|
20
|
+
QWebPage *m_webPage;
|
21
|
+
unsigned m_queryId;
|
22
|
+
};
|
23
|
+
|
24
|
+
#endif // SUNSCRAPERPROXY_H
|