vbulletin_scraper 0.2.1 → 0.3.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: 805e07a163e98ce38cebf5e49964b7da1235da41
4
- data.tar.gz: 5a096f68ecb271d41651246c70788c581207629c
3
+ metadata.gz: 75834d520af865ec1aef5d413b34d39015c51ac6
4
+ data.tar.gz: 458f44485c20b301bb24cae79dcd2bbd309a2fb5
5
5
  SHA512:
6
- metadata.gz: 711a42763aeb4fe45682b91214f24928abf35ded07f489c5a807bc6fc76e71d1ae5b681745cb25255013cffb8a9dd9f0a861ddf7b110f75c5893ebbf82d67188
7
- data.tar.gz: 31425f9eda2c10843b4e7762883f5b0dcf20a15d18a496e8a11bdc454f56e539f11b844f25cb31537ad8cb74233f7bbcacaa66c5634cef3454ded474c6301a16
6
+ metadata.gz: b27b992806780f92751cdf860c2df713e9ac6cf2bde735a197f703e9bc4c873b114feac089058b5d88bd4fa9333faef2face7546f28a4a298b9810de341c2863
7
+ data.tar.gz: 20e582041beb4f28793e4cabb6e651aa92b533daad3a57b22cf6b0239ac2abc18b6c8b867ea37d4e4f86e53233944720777562251f33bd5ca9e530a89c647784
data/Rakefile CHANGED
@@ -1,2 +1,8 @@
1
- require "bundler/gem_tasks"
2
- task :default => :spec
1
+ begin
2
+ require 'rspec/core/rake_task'
3
+
4
+ RSpec::Core::RakeTask.new(:spec)
5
+ task :default => :spec
6
+ rescue LoadError
7
+ puts "RSpec is unavailable. Please run 'bundle install' and try again."
8
+ end
@@ -0,0 +1,43 @@
1
+ require_relative 'scraper'
2
+
3
+ module VbulletinScraper
4
+ module V4
5
+ class ForumScraper < Scraper
6
+ def is_valid_vbulletin
7
+ if get_vbulletin_version != ''
8
+ return true
9
+ else
10
+ return false
11
+ end
12
+ end
13
+
14
+ def get_vbulletin_version
15
+ vbulletinVersion = get_item_by_selector_with_attribute('meta[name="generator"]', 'content')
16
+ if vbulletinVersion != nil
17
+ return get_raw_text(vbulletinVersion.gsub('vBulletin', ''))
18
+ end
19
+ return ''
20
+ end
21
+
22
+ def get_forum_url
23
+ pageUrl = get_item_by_selector_with_attribute('base', 'href')
24
+ if pageUrl != nil
25
+ return get_raw_text(pageUrl)
26
+ end
27
+ return ''
28
+ end
29
+
30
+ def get_forum_title
31
+ forumTitle = get_item_by_selector_with_attribute('#logo img', 'alt')
32
+ if forumTitle == ''
33
+ forumTitle = get_item_by_selector_with_attribute('.logo img', 'title')
34
+ end
35
+ if forumTitle != nil
36
+ return get_raw_text(forumTitle)
37
+ else
38
+ return ''
39
+ end
40
+ end
41
+ end
42
+ end
43
+ end
@@ -1,10 +1,8 @@
1
- require 'nokogiri'
2
- require 'open-uri'
3
- require 'open_uri_redirections'
1
+ require_relative 'scraper'
4
2
 
5
3
  module VbulletinScraper
6
4
  module V4
7
- class PostScraper
5
+ class PostScraper < Scraper
8
6
  def initialize(input)
9
7
  @data = nil
10
8
  if input.start_with? "http" || "www"
@@ -45,7 +43,8 @@ module VbulletinScraper
45
43
  if postContent != nil
46
44
  postContentNoQuotes = Nokogiri::HTML.fragment(postContent.inner_html)
47
45
  postContentNoQuotes.search('div').remove
48
- return postContentNoQuotes.to_s
46
+ postContentNoQuotes.search('comment()').remove
47
+ return get_raw_text(postContentNoQuotes.to_s)
49
48
  end
50
49
  return ''
51
50
  end
@@ -90,51 +89,6 @@ module VbulletinScraper
90
89
  end
91
90
  return ''
92
91
  end
93
-
94
- def get_item_by_selector(selector)
95
- if @data != nil
96
- if @data.at_css(selector)
97
- return @data.at_css(selector)
98
- end
99
- end
100
- return nil
101
- end
102
-
103
- def get_items_by_selector(selector)
104
- if @data != nil
105
- if @data.css(selector)
106
- return @data.css(selector)
107
- end
108
- end
109
- end
110
-
111
- def get_item_by_selector_with_attribute(selector, attribute)
112
- if @data != nil
113
- if @data.at_css(selector)
114
- return @data.at_css(selector)[attribute]
115
- end
116
- end
117
- return nil
118
- end
119
-
120
- def get_raw_text(input)
121
- if input != nil
122
- return input.strip.gsub(/\u00a0/, ' ').gsub('\t', '')
123
- else
124
- return nil
125
- end
126
- end
127
-
128
- def get_int(input)
129
- if input != nil
130
- if input != ''
131
- begin
132
- return input.to_i
133
- end
134
- end
135
- end
136
- return 0
137
- end
138
92
  end
139
93
  end
140
94
  end
@@ -0,0 +1,23 @@
1
+ require_relative 'scraper'
2
+
3
+ module VbulletinScraper
4
+ module V4
5
+ class QuoteScraper < Scraper
6
+ def get_quote_author
7
+ quoteAuthor = get_item_by_selector('.bbcode_postedby strong')
8
+ if quoteAuthor != nil
9
+ return get_raw_text(quoteAuthor.text)
10
+ end
11
+ return ''
12
+ end
13
+
14
+ def get_quote_content
15
+ quoteContent = get_item_by_selector('.message')
16
+ if quoteContent != nil
17
+ return get_raw_text(quoteContent.text)
18
+ end
19
+ return ''
20
+ end
21
+ end
22
+ end
23
+ end
@@ -4,7 +4,9 @@ require 'open_uri_redirections'
4
4
 
5
5
  module VbulletinScraper
6
6
  module V4
7
- class QuoteScraper
7
+ class Scraper
8
+ attr_accessor :data
9
+
8
10
  def initialize(input)
9
11
  @data = nil
10
12
  if input.start_with? "http" || "www"
@@ -16,24 +18,8 @@ module VbulletinScraper
16
18
  end
17
19
  end
18
20
 
19
- def get_quote_author
20
- quoteAuthor = get_item_by_selector('.bbcode_postedby strong')
21
- if quoteAuthor != nil
22
- return get_raw_text(quoteAuthor.text)
23
- end
24
- return ''
25
- end
26
-
27
- def get_quote_content
28
- quoteContent = get_item_by_selector('.message')
29
- if quoteContent != nil
30
- return get_raw_text(quoteContent.text)
31
- end
32
- return ''
33
- end
34
-
35
21
  def get_item_by_selector(selector)
36
- if @data != nil
22
+ if !@data.nil?
37
23
  if @data.at_css(selector)
38
24
  return @data.at_css(selector)
39
25
  end
@@ -41,8 +27,16 @@ module VbulletinScraper
41
27
  return nil
42
28
  end
43
29
 
30
+ def get_items_by_selector(selector)
31
+ if !@data.nil?
32
+ if @data.css(selector)
33
+ return @data.css(selector)
34
+ end
35
+ end
36
+ end
37
+
44
38
  def get_item_by_selector_with_attribute(selector, attribute)
45
- if @data != nil
39
+ if !@data.nil?
46
40
  if @data.at_css(selector)
47
41
  return @data.at_css(selector)[attribute]
48
42
  end
@@ -51,12 +45,23 @@ module VbulletinScraper
51
45
  end
52
46
 
53
47
  def get_raw_text(input)
54
- if input != nil
48
+ if !input.nil?
55
49
  return input.strip.gsub(/\u00a0/, ' ')
56
50
  else
57
51
  return nil
58
52
  end
59
53
  end
54
+
55
+ def get_int(input)
56
+ if !input.nil?
57
+ if input != ''
58
+ begin
59
+ return input.to_i
60
+ end
61
+ end
62
+ end
63
+ return 0
64
+ end
60
65
  end
61
66
  end
62
67
  end
@@ -1,21 +1,8 @@
1
- require 'nokogiri'
2
- require 'open-uri'
3
- require 'open_uri_redirections'
1
+ require_relative 'scraper'
4
2
 
5
3
  module VbulletinScraper
6
4
  module V4
7
- class TopicScraper
8
- def initialize(input)
9
- @data = nil
10
- if input.start_with? "http" || "www"
11
- @data = Nokogiri::HTML(open(input, :allow_redirections => :all))
12
- @data.encoding = "UTF-8"
13
- else
14
- @data = Nokogiri::HTML(input)
15
- @data.encoding = "UTF-8"
16
- end
17
- end
18
-
5
+ class TopicScraper < Scraper
19
6
  def is_valid_vbulletin
20
7
  if get_vbulletin_version != ''
21
8
  return true
@@ -34,7 +21,7 @@ module VbulletinScraper
34
21
 
35
22
  def get_current_page_number
36
23
  if is_valid_vbulletin
37
- pageNumber = get_item_by_selector('#pagination_top a.popupctrl')
24
+ pageNumber = get_item_by_selector('#pagination_top a.popupctrl')
38
25
  if pageNumber != nil
39
26
  pageNumber = pageNumber.text.gsub('Page', '').gsub(' ', '').split('of').first
40
27
  return get_int(get_raw_text(pageNumber))
@@ -95,51 +82,6 @@ module VbulletinScraper
95
82
  return []
96
83
  end
97
84
  end
98
-
99
- def get_item_by_selector(selector)
100
- if @data != nil
101
- if @data.at_css(selector)
102
- return @data.at_css(selector)
103
- end
104
- end
105
- return nil
106
- end
107
-
108
- def get_items_by_selector(selector)
109
- if @data != nil
110
- if @data.css(selector)
111
- return @data.css(selector)
112
- end
113
- end
114
- end
115
-
116
- def get_item_by_selector_with_attribute(selector, attribute)
117
- if @data != nil
118
- if @data.at_css(selector)
119
- return @data.at_css(selector)[attribute]
120
- end
121
- end
122
- return nil
123
- end
124
-
125
- def get_raw_text(input)
126
- if input != nil
127
- return input.strip.gsub(/\u00a0/, ' ')
128
- else
129
- return nil
130
- end
131
- end
132
-
133
- def get_int(input)
134
- if input != nil
135
- if input != ''
136
- begin
137
- return input.to_i
138
- end
139
- end
140
- end
141
- return 0
142
- end
143
85
  end
144
86
  end
145
87
  end
@@ -1,3 +1,3 @@
1
1
  module VbulletinScraper
2
- VERSION = "0.2.1"
2
+ VERSION = "0.3.1"
3
3
  end
@@ -1,9 +1,9 @@
1
1
  require_relative 'vbulletin_scraper/version'
2
- require_relative 'configuration'
3
- require_relative 'V4/forum_scraper'
4
- require_relative 'V4/topic_scraper'
5
- require_relative 'V4/post_scraper'
6
- require_relative 'V4/quote_scraper'
2
+ require_relative 'vbulletin_scraper/configuration'
3
+ require_relative 'vbulletin_scraper/V4/forum_scraper'
4
+ require_relative 'vbulletin_scraper/V4/topic_scraper'
5
+ require_relative 'vbulletin_scraper/V4/post_scraper'
6
+ require_relative 'vbulletin_scraper/V4/quote_scraper'
7
7
 
8
8
  module VbulletinScraper
9
9
  class << self
@@ -4,24 +4,26 @@ $LOAD_PATH.unshift(lib) unless $LOAD_PATH.include?(lib)
4
4
  require 'vbulletin_scraper/version'
5
5
 
6
6
  Gem::Specification.new do |spec|
7
- spec.name = "vbulletin_scraper"
8
- spec.version = VbulletinScraper::VERSION
9
- spec.authors = ["Ben Walters"]
10
- spec.email = ["walters.benj@gmail.com"]
7
+ spec.name = "vbulletin_scraper"
8
+ spec.version = VbulletinScraper::VERSION
9
+ spec.authors = ["Ben Walters"]
10
+ spec.email = ["walters.benj@gmail.com"]
11
11
 
12
- spec.summary = "This gem is designed to allow you to scrape compatible vBulletin forum threads for various data."
13
- spec.homepage = "https://github.com/bendrick92/vbulletin_scraper"
14
- spec.license = "MIT"
12
+ spec.summary = "This gem is designed to allow you to scrape compatible vBulletin forum threads for various data."
13
+ spec.homepage = "https://github.com/bendrick92/vbulletin_scraper"
14
+ spec.license = "MIT"
15
15
 
16
- spec.files = `git ls-files -z`.split("\x0").reject { |f| f.match(%r{^(test|spec|features)/}) }
17
- spec.bindir = "exe"
18
- spec.executables = spec.files.grep(%r{^exe/}) { |f| File.basename(f) }
19
- spec.require_paths = ["lib"]
16
+ spec.files = `git ls-files -z`.split("\x0").reject { |f| f.match(%r{^(test|spec|features)/}) }
17
+ spec.bindir = "exe"
18
+ spec.executables = spec.files.grep(%r{^exe/}) { |f| File.basename(f) }
19
+ spec.require_paths = ["lib"]
20
20
 
21
- spec.add_dependency 'nokogiri', '~> 1.6.8'
22
- spec.add_dependency 'open_uri_redirections'
23
-
24
- spec.add_development_dependency 'bundler', '~> 1.11'
25
- spec.add_development_dependency 'rake', '~> 10.0'
26
- spec.add_development_dependency 'rspec', '~> 3.2'
21
+ spec.add_dependency 'nokogiri', '~> 1.6.8'
22
+ spec.add_dependency 'open_uri_redirections'
23
+
24
+ spec.add_development_dependency "simplecov", "~> 0.12"
25
+ spec.add_development_dependency "codeclimate-test-reporter", "~> 0.6"
26
+ spec.add_development_dependency 'bundler', '~> 1.11'
27
+ spec.add_development_dependency 'rake', '~> 10.0'
28
+ spec.add_development_dependency 'rspec', '~> 3.2'
27
29
  end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: vbulletin_scraper
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.2.1
4
+ version: 0.3.1
5
5
  platform: ruby
6
6
  authors:
7
7
  - Ben Walters
8
8
  autorequire:
9
9
  bindir: exe
10
10
  cert_chain: []
11
- date: 2016-10-06 00:00:00.000000000 Z
11
+ date: 2016-10-09 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: nokogiri
@@ -38,6 +38,34 @@ dependencies:
38
38
  - - ">="
39
39
  - !ruby/object:Gem::Version
40
40
  version: '0'
41
+ - !ruby/object:Gem::Dependency
42
+ name: simplecov
43
+ requirement: !ruby/object:Gem::Requirement
44
+ requirements:
45
+ - - "~>"
46
+ - !ruby/object:Gem::Version
47
+ version: '0.12'
48
+ type: :development
49
+ prerelease: false
50
+ version_requirements: !ruby/object:Gem::Requirement
51
+ requirements:
52
+ - - "~>"
53
+ - !ruby/object:Gem::Version
54
+ version: '0.12'
55
+ - !ruby/object:Gem::Dependency
56
+ name: codeclimate-test-reporter
57
+ requirement: !ruby/object:Gem::Requirement
58
+ requirements:
59
+ - - "~>"
60
+ - !ruby/object:Gem::Version
61
+ version: '0.6'
62
+ type: :development
63
+ prerelease: false
64
+ version_requirements: !ruby/object:Gem::Requirement
65
+ requirements:
66
+ - - "~>"
67
+ - !ruby/object:Gem::Version
68
+ version: '0.6'
41
69
  - !ruby/object:Gem::Dependency
42
70
  name: bundler
43
71
  requirement: !ruby/object:Gem::Requirement
@@ -96,12 +124,13 @@ files:
96
124
  - Rakefile
97
125
  - bin/console
98
126
  - bin/setup
99
- - lib/V4/forum_scraper.rb
100
- - lib/V4/post_scraper.rb
101
- - lib/V4/quote_scraper.rb
102
- - lib/V4/topic_scraper.rb
103
- - lib/configuration.rb
104
127
  - lib/vbulletin_scraper.rb
128
+ - lib/vbulletin_scraper/V4/forum_scraper.rb
129
+ - lib/vbulletin_scraper/V4/post_scraper.rb
130
+ - lib/vbulletin_scraper/V4/quote_scraper.rb
131
+ - lib/vbulletin_scraper/V4/scraper.rb
132
+ - lib/vbulletin_scraper/V4/topic_scraper.rb
133
+ - lib/vbulletin_scraper/configuration.rb
105
134
  - lib/vbulletin_scraper/version.rb
106
135
  - vbulletin_scraper.gemspec
107
136
  homepage: https://github.com/bendrick92/vbulletin_scraper
@@ -1,101 +0,0 @@
1
- require 'nokogiri'
2
- require 'open-uri'
3
- require 'open_uri_redirections'
4
-
5
- module VbulletinScraper
6
- module V4
7
- class ForumScraper
8
- def initialize(input)
9
- @data = nil
10
- if input.start_with? "http" || "www"
11
- @data = Nokogiri::HTML(open(input, :allow_redirections => :all))
12
- @data.encoding = "UTF-8"
13
- else
14
- @data = Nokogiri::HTML(input)
15
- @data.encoding = "UTF-8"
16
- end
17
- end
18
-
19
- def is_valid_vbulletin
20
- if get_vbulletin_version != ''
21
- return true
22
- else
23
- return false
24
- end
25
- end
26
-
27
- def get_vbulletin_version
28
- vbulletinVersion = get_item_by_selector_with_attribute('meta[name="generator"]', 'content')
29
- if vbulletinVersion != nil
30
- return get_raw_text(vbulletinVersion.gsub('vBulletin', ''))
31
- end
32
- return ''
33
- end
34
-
35
- def get_forum_url
36
- pageUrl = get_item_by_selector_with_attribute('base', 'href')
37
- if pageUrl != nil
38
- return get_raw_text(pageUrl)
39
- end
40
- return ''
41
- end
42
-
43
- def get_forum_title
44
- forumTitle = get_item_by_selector_with_attribute('#logo img', 'alt')
45
- if forumTitle == ''
46
- forumTitle = get_item_by_selector_with_attribute('.logo img', 'title')
47
- end
48
- if forumTitle != nil
49
- return get_raw_text(forumTitle)
50
- else
51
- return ''
52
- end
53
- end
54
-
55
- def get_item_by_selector(selector)
56
- if @data != nil
57
- if @data.at_css(selector)
58
- return @data.at_css(selector)
59
- end
60
- end
61
- return nil
62
- end
63
-
64
- def get_items_by_selector(selector)
65
- if @data != nil
66
- if @data.css(selector)
67
- return @data.css(selector)
68
- end
69
- end
70
- end
71
-
72
- def get_item_by_selector_with_attribute(selector, attribute)
73
- if @data != nil
74
- if @data.at_css(selector)
75
- return @data.at_css(selector)[attribute]
76
- end
77
- end
78
- return nil
79
- end
80
-
81
- def get_raw_text(input)
82
- if input != nil
83
- return input.strip.gsub(/\u00a0/, ' ')
84
- else
85
- return nil
86
- end
87
- end
88
-
89
- def get_int(input)
90
- if input != nil
91
- if input != ''
92
- begin
93
- return input.to_i
94
- end
95
- end
96
- end
97
- return 0
98
- end
99
- end
100
- end
101
- end