vbulletin_scraper 0.3.1 → 0.3.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/README.md +106 -9
- data/lib/vbulletin_scraper/V4/post_scraper.rb +14 -10
- data/lib/vbulletin_scraper/version.rb +1 -1
- metadata +2 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 864d58a8ed37ef7a43ea97ecfe7a6d6ac1e1c245
|
4
|
+
data.tar.gz: f25f3b3b12a2a1f12d9654ac7a9a257d52397f03
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 81f5afbd989d699a161c367328a38b037beb45750900b030605ef6685569cc54ef2c4feff07866930b04b47ce7826f5557e82bfb1ca9771c3977e4f52a0eb9d4
|
7
|
+
data.tar.gz: 10d86908627c6375001e4f93ac4a17aa14ecd1f184c7d56afac31ec9b0e7d7de5574e231f5c82808f468ea3729973f3f0018f96b9f97511e01cefb78c40e9e04
|
data/README.md
CHANGED
@@ -1,8 +1,6 @@
|
|
1
1
|
# VbulletinScraper
|
2
2
|
|
3
|
-
|
4
|
-
|
5
|
-
TODO: Delete this and the text above, and describe your gem
|
3
|
+
VbulletinScraper is a Ruby gem designed to parse compatible vBulletin forums, threads, and posts. Currently only vBulletin forums v4.x are supported.
|
6
4
|
|
7
5
|
## Installation
|
8
6
|
|
@@ -20,19 +18,118 @@ Or install it yourself as:
|
|
20
18
|
|
21
19
|
$ gem install vbulletin_scraper
|
22
20
|
|
23
|
-
## Usage
|
24
21
|
|
25
|
-
|
22
|
+
## Available Functions
|
23
|
+
|
24
|
+
### Forums
|
25
|
+
|
26
|
+
is_valid_vbulletin
|
27
|
+
get_vbulletin_version
|
28
|
+
get_forum_url
|
29
|
+
get_forum_title
|
30
|
+
|
31
|
+
### Threads (Topics)
|
32
|
+
|
33
|
+
is_valid_vbulletin
|
34
|
+
get_vbulletin_version
|
35
|
+
get_current_page_number
|
36
|
+
get_total_page_count
|
37
|
+
get_vbulletin_topic_id
|
38
|
+
get_topic_url
|
39
|
+
get_topic_title
|
40
|
+
get_posts
|
41
|
+
|
42
|
+
### Posts
|
26
43
|
|
27
|
-
|
44
|
+
get_vbulletin_post_id
|
45
|
+
get_post_author
|
46
|
+
get_post_content_raw
|
47
|
+
get_post_content
|
48
|
+
get_post_submit_datetime
|
49
|
+
get_quotes
|
50
|
+
get_post_permalink
|
51
|
+
|
52
|
+
### Quotes
|
53
|
+
|
54
|
+
get_quote_author
|
55
|
+
get_quote_content
|
56
|
+
|
57
|
+
|
58
|
+
## Usage
|
28
59
|
|
29
|
-
|
60
|
+
At the top of your Ruby file:
|
61
|
+
|
62
|
+
require 'vbulletin_scraper'
|
63
|
+
|
64
|
+
Example usage:
|
65
|
+
|
66
|
+
forumScraper = VbulletinScraper::V4::ForumScraper.new('http://someforum.com')
|
67
|
+
isVbulletinForum = forumScraper.is_valid_vbulletin
|
68
|
+
vbulletinVersion = forumScraper.get_vbulletin_version
|
69
|
+
forumTitle = forumScraper.get_forum_title
|
70
|
+
forumUrl = forumScraper.get_forum_url
|
71
|
+
|
72
|
+
topicScraper = VbulletinScraper::V4::TopicScraper.new('http://someforum.com/forum/some-thread-section/1234-a-sample-thread')
|
73
|
+
topicTitle = topicScraper.get_topic_title
|
74
|
+
topicUrl = topicScraper.get_topic_url
|
75
|
+
posts = topicScraper.get_posts
|
76
|
+
|
77
|
+
posts.each do |post|
|
78
|
+
postScraper = VbulletinScraper::V4::PostScraper.new(post.to_s)
|
79
|
+
vbulletinPostId = postScraper.get_vbulletin_post_id
|
80
|
+
postAuthor = postScraper.get_post_author
|
81
|
+
postContent = postScraper.get_post_content
|
82
|
+
postSubmitDate = postScraper.get_post_submit_datetime
|
83
|
+
postPermalink = postScraper.get_post_permalink
|
84
|
+
quotes = postScraper.get_quotes
|
85
|
+
|
86
|
+
quotes.each do |quote|
|
87
|
+
quoteScraper = VbulletinScraper::V4::QuoteScraper.new(quote.to_s)
|
88
|
+
quoteAuthor = quoteScraper.get_quote_author
|
89
|
+
quoteContent = quoteScraper.get_quote_content
|
90
|
+
end
|
91
|
+
end
|
92
|
+
|
93
|
+
To grab multiple pages of posts (starting with latest):
|
94
|
+
|
95
|
+
url = 'http://someforum.com/forum/some-thread-section/1234-a-sample-thread'
|
96
|
+
currPage = 9999
|
97
|
+
postCounter = 0
|
98
|
+
maxPosts = 10
|
99
|
+
stop = false
|
100
|
+
|
101
|
+
until stop do
|
102
|
+
currPageUrl = url + '?page=' + currPage.to_s
|
103
|
+
currPageScraper = VbulletinScraper::V4::TopicScraper.new(currPageUrl)
|
104
|
+
currPagePosts = currPageScraper.get_posts
|
105
|
+
|
106
|
+
currPagePosts.each do |post|
|
107
|
+
postScraper = VbulletinScraper::V4::PostScraper.new(post.to_s)
|
108
|
+
|
109
|
+
# etc...
|
110
|
+
|
111
|
+
postCounter += 1
|
112
|
+
|
113
|
+
if postCounter >= maxPosts
|
114
|
+
stop = true
|
115
|
+
end
|
116
|
+
end
|
117
|
+
|
118
|
+
if currPage > 1
|
119
|
+
currPage -= 1
|
120
|
+
else
|
121
|
+
stop = true
|
122
|
+
end
|
123
|
+
end
|
30
124
|
|
31
|
-
To install this gem onto your local machine, run `bundle exec rake install`. To release a new version, update the version number in `version.rb`, and then run `bundle exec rake release`, which will create a git tag for the version, push git commits and tags, and push the `.gem` file to [rubygems.org](https://rubygems.org).
|
32
125
|
|
33
126
|
## Contributing
|
34
127
|
|
35
|
-
|
128
|
+
1. Fork it ( https://github.com/bendrick92/vbulletin_scraper/fork )
|
129
|
+
2. Create your feature branch (git checkout -b my-new-feature)
|
130
|
+
3. Commit your changes (git commit -am 'Add some feature')
|
131
|
+
4. Push to the branch (git push origin my-new-feature)
|
132
|
+
5. Create a new Pull Request
|
36
133
|
|
37
134
|
|
38
135
|
## License
|
@@ -57,16 +57,20 @@ module VbulletinScraper
|
|
57
57
|
rawDateTimeString = get_item_by_selector('.date')
|
58
58
|
if rawDateTimeString != nil
|
59
59
|
rawDateTimeString = get_raw_text(rawDateTimeString.text)
|
60
|
-
|
61
|
-
|
62
|
-
|
63
|
-
|
64
|
-
|
65
|
-
|
66
|
-
|
67
|
-
|
68
|
-
|
69
|
-
|
60
|
+
begin
|
61
|
+
if rawDateTimeString.include? 'Yesterday'
|
62
|
+
rawDateTimeString = rawDateTimeString.gsub('Yesterday', '')
|
63
|
+
formattedDateTimeString = Date.yesterday.strftime(dateFormat) + rawDateTimeString
|
64
|
+
submitDateTime = DateTime.strptime(formattedDateTimeString, dateTimeFormat)
|
65
|
+
elsif rawDateTimeString.include? 'Today'
|
66
|
+
rawDateTimeString = rawDateTimeString.gsub('Today', '')
|
67
|
+
formattedDateTimeString = Date.today.strftime(dateFormat) + rawDateTimeString
|
68
|
+
submitDateTime = DateTime.strptime(formattedDateTimeString, dateTimeFormat)
|
69
|
+
else
|
70
|
+
submitDateTime = DateTime.strptime(rawDateTimeString, dateTimeFormat)
|
71
|
+
end
|
72
|
+
rescue ArgumentError
|
73
|
+
submitDateTime = nil
|
70
74
|
end
|
71
75
|
return submitDateTime
|
72
76
|
end
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: vbulletin_scraper
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.3.
|
4
|
+
version: 0.3.2
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Ben Walters
|
8
8
|
autorequire:
|
9
9
|
bindir: exe
|
10
10
|
cert_chain: []
|
11
|
-
date:
|
11
|
+
date: 2017-02-03 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: nokogiri
|