sunscraper 1.1.0.beta2 → 1.1.0.beta3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -4,13 +4,15 @@ TARGET = sunscraper
4
4
  TEMPLATE = lib
5
5
 
6
6
  SOURCES += sunscraperlibrary.cpp \
7
- sunscraperthread.cpp \
7
+ sunscraperworker.cpp \
8
+ sunscraperwebpage.cpp \
8
9
  sunscraperexternal.cpp \
9
10
  sunscraper.cpp \
10
11
  sunscraperproxy.cpp
11
12
 
12
13
  HEADERS += sunscraperlibrary.h \
13
- sunscraperthread.h \
14
+ sunscraperworker.h \
15
+ sunscraperwebpage.h \
14
16
  sunscraper.h \
15
17
  sunscraperproxy.h
16
18
 
@@ -8,7 +8,7 @@
8
8
  #include <QtDebug>
9
9
  #include "sunscraper.h"
10
10
  #include "sunscraperlibrary.h"
11
- #include "sunscraperthread.h"
11
+ #include "sunscraperworker.h"
12
12
 
13
13
  unsigned Sunscraper::m_nextQueryId = 1;
14
14
  QMutex Sunscraper::m_staticMutex;
@@ -19,7 +19,7 @@ Sunscraper::Sunscraper()
19
19
 
20
20
  m_queryId = m_nextQueryId++;
21
21
 
22
- SunscraperThread *worker = SunscraperThread::instance();
22
+ SunscraperWorker *worker = SunscraperWorker::instance();
23
23
  if(worker == NULL)
24
24
  qFatal("Attempt to run Sunscraper before thread initialization");
25
25
 
@@ -1,5 +1,5 @@
1
1
  #include "sunscraper.h"
2
- #include "sunscraperthread.h"
2
+ #include "sunscraperworker.h"
3
3
 
4
4
  extern "C" {
5
5
  Sunscraper *sunscraper_create()
@@ -34,6 +34,6 @@ extern "C" {
34
34
 
35
35
  void sunscraper_finalize()
36
36
  {
37
- SunscraperThread::commitSuicide();
37
+ SunscraperWorker::commitSuicide();
38
38
  }
39
39
  }
@@ -1,12 +1,12 @@
1
1
  #include "sunscraperlibrary.h"
2
- #include "sunscraperthread.h"
2
+ #include "sunscraperworker.h"
3
3
  #include <QtDebug>
4
4
 
5
5
  SunscraperLibrary SunscraperLibrary::m_instance;
6
6
 
7
7
  SunscraperLibrary::SunscraperLibrary()
8
8
  {
9
- SunscraperThread::invoke();
9
+ SunscraperWorker::invoke();
10
10
  }
11
11
 
12
12
  SunscraperLibrary::~SunscraperLibrary()
@@ -1,7 +1,7 @@
1
1
  #ifndef SUNSCRAPERLIBRARY_H
2
2
  #define SUNSCRAPERLIBRARY_H
3
3
 
4
- class SunscraperThread;
4
+ class SunscraperWorker;
5
5
 
6
6
  class SunscraperLibrary {
7
7
  public:
@@ -0,0 +1,12 @@
1
+ #include "sunscraperwebpage.h"
2
+
3
+ SunscraperWebPage::SunscraperWebPage(QObject *parent) :
4
+ QWebPage(parent)
5
+ {
6
+ }
7
+
8
+ void SunscraperWebPage::javaScriptConsoleMessage(const QString &message,
9
+ int lineNumber, const QString &sourceID)
10
+ {
11
+ emit consoleMessage(QString("%1:%2> %3").arg(sourceID).arg(lineNumber).arg(message));
12
+ }
@@ -0,0 +1,20 @@
1
+ #ifndef SUNSCRAPERWEBPAGE_H
2
+ #define SUNSCRAPERWEBPAGE_H
3
+
4
+ #include <QWebPage>
5
+
6
+ class SunscraperWebPage : public QWebPage
7
+ {
8
+ Q_OBJECT
9
+
10
+ public:
11
+ SunscraperWebPage(QObject *parent = 0);
12
+
13
+ signals:
14
+ void consoleMessage(QString message);
15
+
16
+ protected:
17
+ virtual void javaScriptConsoleMessage(const QString & message, int lineNumber, const QString & sourceID);
18
+ };
19
+
20
+ #endif /* SUNSCRAPERWEBPAGE_H */
@@ -2,23 +2,25 @@
2
2
  #include <QWebPage>
3
3
  #include <QWebFrame>
4
4
  #include <QTimer>
5
- #include "sunscraperthread.h"
5
+ #include <QWebView>
6
+ #include "sunscraperworker.h"
7
+ #include "sunscraperwebpage.h"
6
8
  #include "sunscraperproxy.h"
7
9
  #include <QtDebug>
8
10
  #include <time.h>
9
11
 
10
12
  #if defined(Q_OS_LINUX) || defined(Q_OS_UNIX)
11
- pthread_t SunscraperThread::m_thread;
13
+ pthread_t SunscraperWorker::m_thread;
12
14
  #endif
13
15
 
14
- SunscraperThread *SunscraperThread::m_instance;
15
- QSemaphore SunscraperThread::m_initializationLock;
16
+ SunscraperWorker *SunscraperWorker::m_instance;
17
+ QSemaphore SunscraperWorker::m_initializationLock;
16
18
 
17
- SunscraperThread::SunscraperThread()
19
+ SunscraperWorker::SunscraperWorker()
18
20
  {
19
21
  }
20
22
 
21
- SunscraperThread *SunscraperThread::instance()
23
+ SunscraperWorker *SunscraperWorker::instance()
22
24
  {
23
25
  m_initializationLock.acquire(1);
24
26
  m_initializationLock.release(1);
@@ -26,14 +28,14 @@ SunscraperThread *SunscraperThread::instance()
26
28
  return m_instance;
27
29
  }
28
30
 
29
- void SunscraperThread::invoke()
31
+ void SunscraperWorker::invoke()
30
32
  {
31
33
  #if defined(Q_OS_LINUX) || defined(Q_OS_UNIX)
32
- pthread_create(&m_thread, NULL, &SunscraperThread::thread_routine, NULL);
34
+ pthread_create(&m_thread, NULL, &SunscraperWorker::thread_routine, NULL);
33
35
  #endif
34
36
  }
35
37
 
36
- void *SunscraperThread::thread_routine(void *)
38
+ void *SunscraperWorker::thread_routine(void *)
37
39
  {
38
40
  /* Better error messages. */
39
41
  int argc = 1;
@@ -49,9 +51,9 @@ void *SunscraperThread::thread_routine(void *)
49
51
  QApplication app(argc, argv);
50
52
 
51
53
  if(m_instance != NULL)
52
- qFatal("Attempt to invoke SunscraperThread more than once");
54
+ qFatal("Attempt to invoke SunscraperWorker more than once");
53
55
 
54
- m_instance = new SunscraperThread();
56
+ m_instance = new SunscraperWorker();
55
57
  m_initializationLock.release(1);
56
58
 
57
59
  /* The magic value 42 means we want exit from the loop. */
@@ -62,7 +64,7 @@ void *SunscraperThread::thread_routine(void *)
62
64
  return NULL;
63
65
  }
64
66
 
65
- void SunscraperThread::commitSuicide()
67
+ void SunscraperWorker::commitSuicide()
66
68
  {
67
69
  QApplication::exit(42);
68
70
 
@@ -71,19 +73,19 @@ void SunscraperThread::commitSuicide()
71
73
  #endif
72
74
  }
73
75
 
74
- void SunscraperThread::loadHtml(unsigned queryId, QString html)
76
+ void SunscraperWorker::loadHtml(unsigned queryId, QString html)
75
77
  {
76
78
  QWebPage *webPage = initializeWebPage(queryId);
77
79
  webPage->mainFrame()->setHtml(html);
78
80
  }
79
81
 
80
- void SunscraperThread::loadUrl(unsigned queryId, QString url)
82
+ void SunscraperWorker::loadUrl(unsigned queryId, QString url)
81
83
  {
82
84
  QWebPage *webPage = initializeWebPage(queryId);
83
85
  webPage->mainFrame()->load(url);
84
86
  }
85
87
 
86
- void SunscraperThread::setTimeout(unsigned queryId, unsigned timeout)
88
+ void SunscraperWorker::setTimeout(unsigned queryId, unsigned timeout)
87
89
  {
88
90
  Q_ASSERT(m_timers[queryId] == NULL);
89
91
 
@@ -97,7 +99,7 @@ void SunscraperThread::setTimeout(unsigned queryId, unsigned timeout)
97
99
  m_timers[queryId] = timer;
98
100
  }
99
101
 
100
- void SunscraperThread::finalize(unsigned queryId)
102
+ void SunscraperWorker::finalize(unsigned queryId)
101
103
  {
102
104
  Q_ASSERT(m_webPages[queryId] != NULL);
103
105
 
@@ -110,20 +112,28 @@ void SunscraperThread::finalize(unsigned queryId)
110
112
  }
111
113
  }
112
114
 
113
- QWebPage *SunscraperThread::initializeWebPage(unsigned queryId)
115
+ QWebPage *SunscraperWorker::initializeWebPage(unsigned queryId)
114
116
  {
115
117
  Q_ASSERT(m_webPages[queryId] == NULL);
116
118
 
117
- QWebPage *webPage = new QWebPage(this);
118
- connect(webPage->mainFrame(), SIGNAL(javaScriptWindowObjectCleared()),
119
- this, SLOT(attachAPI()));
119
+ SunscraperWebPage *webPage = new SunscraperWebPage(this);
120
+ webPage->settings()->setAttribute(QWebSettings::LocalStorageEnabled, true);
121
+
122
+ connect(webPage, SIGNAL(frameCreated(QWebFrame*)), this, SLOT(attachFrame(QWebFrame*)));
123
+ connect(webPage, SIGNAL(consoleMessage(QString)), this, SLOT(routeMessage(QString)));
120
124
 
121
125
  m_webPages[queryId] = webPage;
122
126
 
123
127
  return webPage;
124
128
  }
125
129
 
126
- void SunscraperThread::attachAPI()
130
+ void SunscraperWorker::attachFrame(QWebFrame *frame)
131
+ {
132
+ connect(frame, SIGNAL(javaScriptWindowObjectCleared()),
133
+ this, SLOT(attachAPI()));
134
+ }
135
+
136
+ void SunscraperWorker::attachAPI()
127
137
  {
128
138
  QWebFrame *origin = static_cast<QWebFrame *>(QObject::sender());
129
139
  QWebPage *page = origin->page();
@@ -137,7 +147,7 @@ void SunscraperThread::attachAPI()
137
147
  origin->addToJavaScriptWindowObject("Sunscraper", proxy, QScriptEngine::QtOwnership);
138
148
  }
139
149
 
140
- void SunscraperThread::routeTimeout()
150
+ void SunscraperWorker::routeTimeout()
141
151
  {
142
152
  QTimer *origin = static_cast<QTimer *>(QObject::sender());
143
153
 
@@ -146,3 +156,8 @@ void SunscraperThread::routeTimeout()
146
156
 
147
157
  emit timeout(queryId);
148
158
  }
159
+
160
+ void SunscraperWorker::routeMessage(QString message)
161
+ {
162
+ qDebug() << "Sunscraper Console:" << message;
163
+ }
@@ -1,20 +1,22 @@
1
- #ifndef SUNSCRAPERTHREAD_H
2
- #define SUNSCRAPERTHREAD_H
1
+ #ifndef SUNSCRAPERWORKER_H
2
+ #define SUNSCRAPERWORKER_H
3
3
 
4
4
  #include <QObject>
5
5
  #include <QSemaphore>
6
6
  #include <QMap>
7
+ #include <QUrl>
7
8
 
8
9
  class QWebPage;
10
+ class QWebFrame;
9
11
  class QTimer;
10
12
 
11
- class SunscraperThread : public QObject
13
+ class SunscraperWorker : public QObject
12
14
  {
13
15
  Q_OBJECT
14
16
  public:
15
17
  static void invoke();
16
18
  static void commitSuicide();
17
- static SunscraperThread *instance();
19
+ static SunscraperWorker *instance();
18
20
 
19
21
  signals:
20
22
  void finished(unsigned queryId, QString result);
@@ -27,17 +29,19 @@ public slots:
27
29
  void finalize(unsigned queryId);
28
30
 
29
31
  private slots:
32
+ void attachFrame(QWebFrame *frame);
30
33
  void attachAPI();
31
34
  void routeTimeout();
35
+ void routeMessage(QString message);
32
36
 
33
37
  private:
34
- static SunscraperThread *m_instance;
38
+ static SunscraperWorker *m_instance;
35
39
  static QSemaphore m_initializationLock;
36
40
 
37
41
  #if defined(Q_OS_LINUX) || defined(Q_OS_UNIX)
38
42
  static pthread_t m_thread;
39
43
  #else
40
- #error Your platform is unsupported. Implement SunscraperThread::invoke() and send a pull request.
44
+ #error Your platform is unsupported. Implement SunscraperWorker::invoke() and send a pull request.
41
45
  #endif
42
46
 
43
47
  static void *thread_routine(void *arg);
@@ -45,10 +49,10 @@ private:
45
49
  QMap<unsigned, QWebPage *> m_webPages;
46
50
  QMap<unsigned, QTimer *> m_timers;
47
51
 
48
- SunscraperThread();
49
- SunscraperThread(SunscraperThread &);
52
+ SunscraperWorker();
53
+ SunscraperWorker(SunscraperWorker &);
50
54
 
51
55
  QWebPage *initializeWebPage(unsigned queryId);
52
56
  };
53
57
 
54
- #endif // SUNSCRAPERTHREAD_H
58
+ #endif // SUNSCRAPERWORKER_H
@@ -1,22 +1,31 @@
1
1
  # This Makefile will get replaced by qmake.
2
2
 
3
- if Gem.win_platform?
4
- qmake = %{qmake -spec win32-g++}
5
- elsif RUBY_PLATFORM =~ /darwin/i || RbConfig::CONFIG['target_os'] == 'darwin'
3
+ if RUBY_PLATFORM =~ /darwin/i || RbConfig::CONFIG['target_os'] == 'darwin'
4
+ # Cannot you OS X have a build system like all sane people?
5
+ # Win32 wins again.
6
6
  qmake = %{qmake -spec macx-g++}
7
- else
8
- qmake = %{qmake}
9
- end
10
7
 
11
- File.open("Makefile", "w") do |mf|
12
- mf.puts <<-ENDM
8
+ File.open("Makefile", "w") do |mf|
9
+ mf.puts <<-ENDM
13
10
  all:
14
- cd embed; #{qmake}
15
- cd standalone; #{qmake}
16
- make -C embed
17
- make -C standalone
18
-
11
+ (cd embed && #{qmake}; make)
12
+ (cd standalone && #{qmake}; make)
19
13
  install:
20
14
  # do nothing
21
- ENDM
22
- end
15
+ ENDM
16
+ end
17
+ else
18
+ if Gem.win_platform?
19
+ qmake = %{qmake -spec win32-g++}
20
+ else
21
+ qmake = %{qmake}
22
+ end
23
+
24
+ File.open("Makefile", "w") do |mf|
25
+ mf.puts <<-ENDM
26
+ all:
27
+ #{qmake}
28
+ make
29
+ ENDM
30
+ end
31
+ end
@@ -1,5 +1,6 @@
1
1
  #include <QWebPage>
2
2
  #include <QWebFrame>
3
+ #include <QtDebug>
3
4
  #include "sunscraperproxy.h"
4
5
 
5
6
  SunscraperProxy::SunscraperProxy(QWebPage *parent, unsigned queryId) :
@@ -35,6 +35,8 @@ QWebPage *SunscraperWorker::initializeWebPage(unsigned queryId)
35
35
  Q_ASSERT(_webPages[queryId] == NULL);
36
36
 
37
37
  QWebPage *webPage = new QWebPage(this);
38
+ webPage->settings()->setAttribute(QWebSettings::LocalStorageEnabled, true);
39
+
38
40
  connect(webPage->mainFrame(), SIGNAL(javaScriptWindowObjectCleared()),
39
41
  this, SLOT(attachAPI()));
40
42
 
@@ -0,0 +1,2 @@
1
+ TEMPLATE = subdirs
2
+ SUBDIRS = embed standalone
@@ -3,7 +3,7 @@ $:.push File.expand_path("../lib", __FILE__)
3
3
 
4
4
  Gem::Specification.new do |s|
5
5
  s.name = "sunscraper"
6
- s.version = "1.1.0.beta2"
6
+ s.version = "1.1.0.beta3"
7
7
  s.authors = ["Peter Zotov"]
8
8
  s.email = ["whitequark@whitequark.org"]
9
9
  s.homepage = "http://github.com/whitequark/sunscraper"
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: sunscraper
3
3
  version: !ruby/object:Gem::Version
4
- version: 1.1.0.beta2
4
+ version: 1.1.0.beta3
5
5
  prerelease: 6
6
6
  platform: ruby
7
7
  authors:
@@ -9,11 +9,11 @@ authors:
9
9
  autorequire:
10
10
  bindir: bin
11
11
  cert_chain: []
12
- date: 2012-03-09 00:00:00.000000000 Z
12
+ date: 2012-03-10 00:00:00.000000000 Z
13
13
  dependencies:
14
14
  - !ruby/object:Gem::Dependency
15
15
  name: rspec
16
- requirement: &76880130 !ruby/object:Gem::Requirement
16
+ requirement: &81880600 !ruby/object:Gem::Requirement
17
17
  none: false
18
18
  requirements:
19
19
  - - ! '>='
@@ -21,10 +21,10 @@ dependencies:
21
21
  version: '0'
22
22
  type: :development
23
23
  prerelease: false
24
- version_requirements: *76880130
24
+ version_requirements: *81880600
25
25
  - !ruby/object:Gem::Dependency
26
26
  name: ffi
27
- requirement: &76879880 !ruby/object:Gem::Requirement
27
+ requirement: &81880200 !ruby/object:Gem::Requirement
28
28
  none: false
29
29
  requirements:
30
30
  - - ! '>='
@@ -32,7 +32,7 @@ dependencies:
32
32
  version: 1.0.11
33
33
  type: :runtime
34
34
  prerelease: false
35
- version_requirements: *76879880
35
+ version_requirements: *81880200
36
36
  description: A WebKit-based, JavaScript-capable HTML scraper.
37
37
  email:
38
38
  - whitequark@whitequark.org
@@ -50,18 +50,20 @@ files:
50
50
  - README.md
51
51
  - Rakefile
52
52
  - ext/.gitignore
53
+ - ext/embed/embed.pro
53
54
  - ext/embed/sunscraper.cpp
54
55
  - ext/embed/sunscraper.h
55
- - ext/embed/sunscraper.pro
56
56
  - ext/embed/sunscraperexternal.cpp
57
57
  - ext/embed/sunscraperlibrary.cpp
58
58
  - ext/embed/sunscraperlibrary.h
59
59
  - ext/embed/sunscraperproxy.cpp
60
60
  - ext/embed/sunscraperproxy.h
61
- - ext/embed/sunscraperthread.cpp
62
- - ext/embed/sunscraperthread.h
61
+ - ext/embed/sunscraperwebpage.cpp
62
+ - ext/embed/sunscraperwebpage.h
63
+ - ext/embed/sunscraperworker.cpp
64
+ - ext/embed/sunscraperworker.h
63
65
  - ext/extconf.rb
64
- - ext/standalone/sunscraper.pro
66
+ - ext/standalone/standalone.pro
65
67
  - ext/standalone/sunscrapermain.cpp
66
68
  - ext/standalone/sunscraperproxy.cpp
67
69
  - ext/standalone/sunscraperproxy.h
@@ -69,6 +71,7 @@ files:
69
71
  - ext/standalone/sunscraperrpc.h
70
72
  - ext/standalone/sunscraperworker.cpp
71
73
  - ext/standalone/sunscraperworker.h
74
+ - ext/sunscraper-ext.pro
72
75
  - lib/sunscraper.rb
73
76
  - lib/sunscraper/library.rb
74
77
  - lib/sunscraper/standalone.rb
@@ -100,4 +103,3 @@ signing_key:
100
103
  specification_version: 3
101
104
  summary: A WebKit-based, JavaScript-capable HTML scraper.
102
105
  test_files: []
103
- has_rdoc: