sunscraper 1.1.0.beta2 → 1.1.0.beta3

Sign up to get free protection for your applications and to get access to all the features.
@@ -4,13 +4,15 @@ TARGET = sunscraper
4
4
  TEMPLATE = lib
5
5
 
6
6
  SOURCES += sunscraperlibrary.cpp \
7
- sunscraperthread.cpp \
7
+ sunscraperworker.cpp \
8
+ sunscraperwebpage.cpp \
8
9
  sunscraperexternal.cpp \
9
10
  sunscraper.cpp \
10
11
  sunscraperproxy.cpp
11
12
 
12
13
  HEADERS += sunscraperlibrary.h \
13
- sunscraperthread.h \
14
+ sunscraperworker.h \
15
+ sunscraperwebpage.h \
14
16
  sunscraper.h \
15
17
  sunscraperproxy.h
16
18
 
@@ -8,7 +8,7 @@
8
8
  #include <QtDebug>
9
9
  #include "sunscraper.h"
10
10
  #include "sunscraperlibrary.h"
11
- #include "sunscraperthread.h"
11
+ #include "sunscraperworker.h"
12
12
 
13
13
  unsigned Sunscraper::m_nextQueryId = 1;
14
14
  QMutex Sunscraper::m_staticMutex;
@@ -19,7 +19,7 @@ Sunscraper::Sunscraper()
19
19
 
20
20
  m_queryId = m_nextQueryId++;
21
21
 
22
- SunscraperThread *worker = SunscraperThread::instance();
22
+ SunscraperWorker *worker = SunscraperWorker::instance();
23
23
  if(worker == NULL)
24
24
  qFatal("Attempt to run Sunscraper before thread initialization");
25
25
 
@@ -1,5 +1,5 @@
1
1
  #include "sunscraper.h"
2
- #include "sunscraperthread.h"
2
+ #include "sunscraperworker.h"
3
3
 
4
4
  extern "C" {
5
5
  Sunscraper *sunscraper_create()
@@ -34,6 +34,6 @@ extern "C" {
34
34
 
35
35
  void sunscraper_finalize()
36
36
  {
37
- SunscraperThread::commitSuicide();
37
+ SunscraperWorker::commitSuicide();
38
38
  }
39
39
  }
@@ -1,12 +1,12 @@
1
1
  #include "sunscraperlibrary.h"
2
- #include "sunscraperthread.h"
2
+ #include "sunscraperworker.h"
3
3
  #include <QtDebug>
4
4
 
5
5
  SunscraperLibrary SunscraperLibrary::m_instance;
6
6
 
7
7
  SunscraperLibrary::SunscraperLibrary()
8
8
  {
9
- SunscraperThread::invoke();
9
+ SunscraperWorker::invoke();
10
10
  }
11
11
 
12
12
  SunscraperLibrary::~SunscraperLibrary()
@@ -1,7 +1,7 @@
1
1
  #ifndef SUNSCRAPERLIBRARY_H
2
2
  #define SUNSCRAPERLIBRARY_H
3
3
 
4
- class SunscraperThread;
4
+ class SunscraperWorker;
5
5
 
6
6
  class SunscraperLibrary {
7
7
  public:
@@ -0,0 +1,12 @@
1
+ #include "sunscraperwebpage.h"
2
+
3
+ SunscraperWebPage::SunscraperWebPage(QObject *parent) :
4
+ QWebPage(parent)
5
+ {
6
+ }
7
+
8
+ void SunscraperWebPage::javaScriptConsoleMessage(const QString &message,
9
+ int lineNumber, const QString &sourceID)
10
+ {
11
+ emit consoleMessage(QString("%1:%2> %3").arg(sourceID).arg(lineNumber).arg(message));
12
+ }
@@ -0,0 +1,20 @@
1
+ #ifndef SUNSCRAPERWEBPAGE_H
2
+ #define SUNSCRAPERWEBPAGE_H
3
+
4
+ #include <QWebPage>
5
+
6
+ class SunscraperWebPage : public QWebPage
7
+ {
8
+ Q_OBJECT
9
+
10
+ public:
11
+ SunscraperWebPage(QObject *parent = 0);
12
+
13
+ signals:
14
+ void consoleMessage(QString message);
15
+
16
+ protected:
17
+ virtual void javaScriptConsoleMessage(const QString & message, int lineNumber, const QString & sourceID);
18
+ };
19
+
20
+ #endif /* SUNSCRAPERWEBPAGE_H */
@@ -2,23 +2,25 @@
2
2
  #include <QWebPage>
3
3
  #include <QWebFrame>
4
4
  #include <QTimer>
5
- #include "sunscraperthread.h"
5
+ #include <QWebView>
6
+ #include "sunscraperworker.h"
7
+ #include "sunscraperwebpage.h"
6
8
  #include "sunscraperproxy.h"
7
9
  #include <QtDebug>
8
10
  #include <time.h>
9
11
 
10
12
  #if defined(Q_OS_LINUX) || defined(Q_OS_UNIX)
11
- pthread_t SunscraperThread::m_thread;
13
+ pthread_t SunscraperWorker::m_thread;
12
14
  #endif
13
15
 
14
- SunscraperThread *SunscraperThread::m_instance;
15
- QSemaphore SunscraperThread::m_initializationLock;
16
+ SunscraperWorker *SunscraperWorker::m_instance;
17
+ QSemaphore SunscraperWorker::m_initializationLock;
16
18
 
17
- SunscraperThread::SunscraperThread()
19
+ SunscraperWorker::SunscraperWorker()
18
20
  {
19
21
  }
20
22
 
21
- SunscraperThread *SunscraperThread::instance()
23
+ SunscraperWorker *SunscraperWorker::instance()
22
24
  {
23
25
  m_initializationLock.acquire(1);
24
26
  m_initializationLock.release(1);
@@ -26,14 +28,14 @@ SunscraperThread *SunscraperThread::instance()
26
28
  return m_instance;
27
29
  }
28
30
 
29
- void SunscraperThread::invoke()
31
+ void SunscraperWorker::invoke()
30
32
  {
31
33
  #if defined(Q_OS_LINUX) || defined(Q_OS_UNIX)
32
- pthread_create(&m_thread, NULL, &SunscraperThread::thread_routine, NULL);
34
+ pthread_create(&m_thread, NULL, &SunscraperWorker::thread_routine, NULL);
33
35
  #endif
34
36
  }
35
37
 
36
- void *SunscraperThread::thread_routine(void *)
38
+ void *SunscraperWorker::thread_routine(void *)
37
39
  {
38
40
  /* Better error messages. */
39
41
  int argc = 1;
@@ -49,9 +51,9 @@ void *SunscraperThread::thread_routine(void *)
49
51
  QApplication app(argc, argv);
50
52
 
51
53
  if(m_instance != NULL)
52
- qFatal("Attempt to invoke SunscraperThread more than once");
54
+ qFatal("Attempt to invoke SunscraperWorker more than once");
53
55
 
54
- m_instance = new SunscraperThread();
56
+ m_instance = new SunscraperWorker();
55
57
  m_initializationLock.release(1);
56
58
 
57
59
  /* The magic value 42 means we want exit from the loop. */
@@ -62,7 +64,7 @@ void *SunscraperThread::thread_routine(void *)
62
64
  return NULL;
63
65
  }
64
66
 
65
- void SunscraperThread::commitSuicide()
67
+ void SunscraperWorker::commitSuicide()
66
68
  {
67
69
  QApplication::exit(42);
68
70
 
@@ -71,19 +73,19 @@ void SunscraperThread::commitSuicide()
71
73
  #endif
72
74
  }
73
75
 
74
- void SunscraperThread::loadHtml(unsigned queryId, QString html)
76
+ void SunscraperWorker::loadHtml(unsigned queryId, QString html)
75
77
  {
76
78
  QWebPage *webPage = initializeWebPage(queryId);
77
79
  webPage->mainFrame()->setHtml(html);
78
80
  }
79
81
 
80
- void SunscraperThread::loadUrl(unsigned queryId, QString url)
82
+ void SunscraperWorker::loadUrl(unsigned queryId, QString url)
81
83
  {
82
84
  QWebPage *webPage = initializeWebPage(queryId);
83
85
  webPage->mainFrame()->load(url);
84
86
  }
85
87
 
86
- void SunscraperThread::setTimeout(unsigned queryId, unsigned timeout)
88
+ void SunscraperWorker::setTimeout(unsigned queryId, unsigned timeout)
87
89
  {
88
90
  Q_ASSERT(m_timers[queryId] == NULL);
89
91
 
@@ -97,7 +99,7 @@ void SunscraperThread::setTimeout(unsigned queryId, unsigned timeout)
97
99
  m_timers[queryId] = timer;
98
100
  }
99
101
 
100
- void SunscraperThread::finalize(unsigned queryId)
102
+ void SunscraperWorker::finalize(unsigned queryId)
101
103
  {
102
104
  Q_ASSERT(m_webPages[queryId] != NULL);
103
105
 
@@ -110,20 +112,28 @@ void SunscraperThread::finalize(unsigned queryId)
110
112
  }
111
113
  }
112
114
 
113
- QWebPage *SunscraperThread::initializeWebPage(unsigned queryId)
115
+ QWebPage *SunscraperWorker::initializeWebPage(unsigned queryId)
114
116
  {
115
117
  Q_ASSERT(m_webPages[queryId] == NULL);
116
118
 
117
- QWebPage *webPage = new QWebPage(this);
118
- connect(webPage->mainFrame(), SIGNAL(javaScriptWindowObjectCleared()),
119
- this, SLOT(attachAPI()));
119
+ SunscraperWebPage *webPage = new SunscraperWebPage(this);
120
+ webPage->settings()->setAttribute(QWebSettings::LocalStorageEnabled, true);
121
+
122
+ connect(webPage, SIGNAL(frameCreated(QWebFrame*)), this, SLOT(attachFrame(QWebFrame*)));
123
+ connect(webPage, SIGNAL(consoleMessage(QString)), this, SLOT(routeMessage(QString)));
120
124
 
121
125
  m_webPages[queryId] = webPage;
122
126
 
123
127
  return webPage;
124
128
  }
125
129
 
126
- void SunscraperThread::attachAPI()
130
+ void SunscraperWorker::attachFrame(QWebFrame *frame)
131
+ {
132
+ connect(frame, SIGNAL(javaScriptWindowObjectCleared()),
133
+ this, SLOT(attachAPI()));
134
+ }
135
+
136
+ void SunscraperWorker::attachAPI()
127
137
  {
128
138
  QWebFrame *origin = static_cast<QWebFrame *>(QObject::sender());
129
139
  QWebPage *page = origin->page();
@@ -137,7 +147,7 @@ void SunscraperThread::attachAPI()
137
147
  origin->addToJavaScriptWindowObject("Sunscraper", proxy, QScriptEngine::QtOwnership);
138
148
  }
139
149
 
140
- void SunscraperThread::routeTimeout()
150
+ void SunscraperWorker::routeTimeout()
141
151
  {
142
152
  QTimer *origin = static_cast<QTimer *>(QObject::sender());
143
153
 
@@ -146,3 +156,8 @@ void SunscraperThread::routeTimeout()
146
156
 
147
157
  emit timeout(queryId);
148
158
  }
159
+
160
+ void SunscraperWorker::routeMessage(QString message)
161
+ {
162
+ qDebug() << "Sunscraper Console:" << message;
163
+ }
@@ -1,20 +1,22 @@
1
- #ifndef SUNSCRAPERTHREAD_H
2
- #define SUNSCRAPERTHREAD_H
1
+ #ifndef SUNSCRAPERWORKER_H
2
+ #define SUNSCRAPERWORKER_H
3
3
 
4
4
  #include <QObject>
5
5
  #include <QSemaphore>
6
6
  #include <QMap>
7
+ #include <QUrl>
7
8
 
8
9
  class QWebPage;
10
+ class QWebFrame;
9
11
  class QTimer;
10
12
 
11
- class SunscraperThread : public QObject
13
+ class SunscraperWorker : public QObject
12
14
  {
13
15
  Q_OBJECT
14
16
  public:
15
17
  static void invoke();
16
18
  static void commitSuicide();
17
- static SunscraperThread *instance();
19
+ static SunscraperWorker *instance();
18
20
 
19
21
  signals:
20
22
  void finished(unsigned queryId, QString result);
@@ -27,17 +29,19 @@ public slots:
27
29
  void finalize(unsigned queryId);
28
30
 
29
31
  private slots:
32
+ void attachFrame(QWebFrame *frame);
30
33
  void attachAPI();
31
34
  void routeTimeout();
35
+ void routeMessage(QString message);
32
36
 
33
37
  private:
34
- static SunscraperThread *m_instance;
38
+ static SunscraperWorker *m_instance;
35
39
  static QSemaphore m_initializationLock;
36
40
 
37
41
  #if defined(Q_OS_LINUX) || defined(Q_OS_UNIX)
38
42
  static pthread_t m_thread;
39
43
  #else
40
- #error Your platform is unsupported. Implement SunscraperThread::invoke() and send a pull request.
44
+ #error Your platform is unsupported. Implement SunscraperWorker::invoke() and send a pull request.
41
45
  #endif
42
46
 
43
47
  static void *thread_routine(void *arg);
@@ -45,10 +49,10 @@ private:
45
49
  QMap<unsigned, QWebPage *> m_webPages;
46
50
  QMap<unsigned, QTimer *> m_timers;
47
51
 
48
- SunscraperThread();
49
- SunscraperThread(SunscraperThread &);
52
+ SunscraperWorker();
53
+ SunscraperWorker(SunscraperWorker &);
50
54
 
51
55
  QWebPage *initializeWebPage(unsigned queryId);
52
56
  };
53
57
 
54
- #endif // SUNSCRAPERTHREAD_H
58
+ #endif // SUNSCRAPERWORKER_H
@@ -1,22 +1,31 @@
1
1
  # This Makefile will get replaced by qmake.
2
2
 
3
- if Gem.win_platform?
4
- qmake = %{qmake -spec win32-g++}
5
- elsif RUBY_PLATFORM =~ /darwin/i || RbConfig::CONFIG['target_os'] == 'darwin'
3
+ if RUBY_PLATFORM =~ /darwin/i || RbConfig::CONFIG['target_os'] == 'darwin'
4
+ # Cannot you OS X have a build system like all sane people?
5
+ # Win32 wins again.
6
6
  qmake = %{qmake -spec macx-g++}
7
- else
8
- qmake = %{qmake}
9
- end
10
7
 
11
- File.open("Makefile", "w") do |mf|
12
- mf.puts <<-ENDM
8
+ File.open("Makefile", "w") do |mf|
9
+ mf.puts <<-ENDM
13
10
  all:
14
- cd embed; #{qmake}
15
- cd standalone; #{qmake}
16
- make -C embed
17
- make -C standalone
18
-
11
+ (cd embed && #{qmake}; make)
12
+ (cd standalone && #{qmake}; make)
19
13
  install:
20
14
  # do nothing
21
- ENDM
22
- end
15
+ ENDM
16
+ end
17
+ else
18
+ if Gem.win_platform?
19
+ qmake = %{qmake -spec win32-g++}
20
+ else
21
+ qmake = %{qmake}
22
+ end
23
+
24
+ File.open("Makefile", "w") do |mf|
25
+ mf.puts <<-ENDM
26
+ all:
27
+ #{qmake}
28
+ make
29
+ ENDM
30
+ end
31
+ end
@@ -1,5 +1,6 @@
1
1
  #include <QWebPage>
2
2
  #include <QWebFrame>
3
+ #include <QtDebug>
3
4
  #include "sunscraperproxy.h"
4
5
 
5
6
  SunscraperProxy::SunscraperProxy(QWebPage *parent, unsigned queryId) :
@@ -35,6 +35,8 @@ QWebPage *SunscraperWorker::initializeWebPage(unsigned queryId)
35
35
  Q_ASSERT(_webPages[queryId] == NULL);
36
36
 
37
37
  QWebPage *webPage = new QWebPage(this);
38
+ webPage->settings()->setAttribute(QWebSettings::LocalStorageEnabled, true);
39
+
38
40
  connect(webPage->mainFrame(), SIGNAL(javaScriptWindowObjectCleared()),
39
41
  this, SLOT(attachAPI()));
40
42
 
@@ -0,0 +1,2 @@
1
+ TEMPLATE = subdirs
2
+ SUBDIRS = embed standalone
@@ -3,7 +3,7 @@ $:.push File.expand_path("../lib", __FILE__)
3
3
 
4
4
  Gem::Specification.new do |s|
5
5
  s.name = "sunscraper"
6
- s.version = "1.1.0.beta2"
6
+ s.version = "1.1.0.beta3"
7
7
  s.authors = ["Peter Zotov"]
8
8
  s.email = ["whitequark@whitequark.org"]
9
9
  s.homepage = "http://github.com/whitequark/sunscraper"
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: sunscraper
3
3
  version: !ruby/object:Gem::Version
4
- version: 1.1.0.beta2
4
+ version: 1.1.0.beta3
5
5
  prerelease: 6
6
6
  platform: ruby
7
7
  authors:
@@ -9,11 +9,11 @@ authors:
9
9
  autorequire:
10
10
  bindir: bin
11
11
  cert_chain: []
12
- date: 2012-03-09 00:00:00.000000000 Z
12
+ date: 2012-03-10 00:00:00.000000000 Z
13
13
  dependencies:
14
14
  - !ruby/object:Gem::Dependency
15
15
  name: rspec
16
- requirement: &76880130 !ruby/object:Gem::Requirement
16
+ requirement: &81880600 !ruby/object:Gem::Requirement
17
17
  none: false
18
18
  requirements:
19
19
  - - ! '>='
@@ -21,10 +21,10 @@ dependencies:
21
21
  version: '0'
22
22
  type: :development
23
23
  prerelease: false
24
- version_requirements: *76880130
24
+ version_requirements: *81880600
25
25
  - !ruby/object:Gem::Dependency
26
26
  name: ffi
27
- requirement: &76879880 !ruby/object:Gem::Requirement
27
+ requirement: &81880200 !ruby/object:Gem::Requirement
28
28
  none: false
29
29
  requirements:
30
30
  - - ! '>='
@@ -32,7 +32,7 @@ dependencies:
32
32
  version: 1.0.11
33
33
  type: :runtime
34
34
  prerelease: false
35
- version_requirements: *76879880
35
+ version_requirements: *81880200
36
36
  description: A WebKit-based, JavaScript-capable HTML scraper.
37
37
  email:
38
38
  - whitequark@whitequark.org
@@ -50,18 +50,20 @@ files:
50
50
  - README.md
51
51
  - Rakefile
52
52
  - ext/.gitignore
53
+ - ext/embed/embed.pro
53
54
  - ext/embed/sunscraper.cpp
54
55
  - ext/embed/sunscraper.h
55
- - ext/embed/sunscraper.pro
56
56
  - ext/embed/sunscraperexternal.cpp
57
57
  - ext/embed/sunscraperlibrary.cpp
58
58
  - ext/embed/sunscraperlibrary.h
59
59
  - ext/embed/sunscraperproxy.cpp
60
60
  - ext/embed/sunscraperproxy.h
61
- - ext/embed/sunscraperthread.cpp
62
- - ext/embed/sunscraperthread.h
61
+ - ext/embed/sunscraperwebpage.cpp
62
+ - ext/embed/sunscraperwebpage.h
63
+ - ext/embed/sunscraperworker.cpp
64
+ - ext/embed/sunscraperworker.h
63
65
  - ext/extconf.rb
64
- - ext/standalone/sunscraper.pro
66
+ - ext/standalone/standalone.pro
65
67
  - ext/standalone/sunscrapermain.cpp
66
68
  - ext/standalone/sunscraperproxy.cpp
67
69
  - ext/standalone/sunscraperproxy.h
@@ -69,6 +71,7 @@ files:
69
71
  - ext/standalone/sunscraperrpc.h
70
72
  - ext/standalone/sunscraperworker.cpp
71
73
  - ext/standalone/sunscraperworker.h
74
+ - ext/sunscraper-ext.pro
72
75
  - lib/sunscraper.rb
73
76
  - lib/sunscraper/library.rb
74
77
  - lib/sunscraper/standalone.rb
@@ -100,4 +103,3 @@ signing_key:
100
103
  specification_version: 3
101
104
  summary: A WebKit-based, JavaScript-capable HTML scraper.
102
105
  test_files: []
103
- has_rdoc: