vbulletin_scraper 0.3.1 → 0.3.2
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/README.md +106 -9
- data/lib/vbulletin_scraper/V4/post_scraper.rb +14 -10
- data/lib/vbulletin_scraper/version.rb +1 -1
- metadata +2 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 864d58a8ed37ef7a43ea97ecfe7a6d6ac1e1c245
|
4
|
+
data.tar.gz: f25f3b3b12a2a1f12d9654ac7a9a257d52397f03
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 81f5afbd989d699a161c367328a38b037beb45750900b030605ef6685569cc54ef2c4feff07866930b04b47ce7826f5557e82bfb1ca9771c3977e4f52a0eb9d4
|
7
|
+
data.tar.gz: 10d86908627c6375001e4f93ac4a17aa14ecd1f184c7d56afac31ec9b0e7d7de5574e231f5c82808f468ea3729973f3f0018f96b9f97511e01cefb78c40e9e04
|
data/README.md
CHANGED
@@ -1,8 +1,6 @@
|
|
1
1
|
# VbulletinScraper
|
2
2
|
|
3
|
-
|
4
|
-
|
5
|
-
TODO: Delete this and the text above, and describe your gem
|
3
|
+
VbulletinScraper is a Ruby gem designed to parse compatible vBulletin forums, threads, and posts. Currently only vBulletin forums v4.x are supported.
|
6
4
|
|
7
5
|
## Installation
|
8
6
|
|
@@ -20,19 +18,118 @@ Or install it yourself as:
|
|
20
18
|
|
21
19
|
$ gem install vbulletin_scraper
|
22
20
|
|
23
|
-
## Usage
|
24
21
|
|
25
|
-
|
22
|
+
## Available Functions
|
23
|
+
|
24
|
+
### Forums
|
25
|
+
|
26
|
+
is_valid_vbulletin
|
27
|
+
get_vbulletin_version
|
28
|
+
get_forum_url
|
29
|
+
get_forum_title
|
30
|
+
|
31
|
+
### Threads (Topics)
|
32
|
+
|
33
|
+
is_valid_vbulletin
|
34
|
+
get_vbulletin_version
|
35
|
+
get_current_page_number
|
36
|
+
get_total_page_count
|
37
|
+
get_vbulletin_topic_id
|
38
|
+
get_topic_url
|
39
|
+
get_topic_title
|
40
|
+
get_posts
|
41
|
+
|
42
|
+
### Posts
|
26
43
|
|
27
|
-
|
44
|
+
get_vbulletin_post_id
|
45
|
+
get_post_author
|
46
|
+
get_post_content_raw
|
47
|
+
get_post_content
|
48
|
+
get_post_submit_datetime
|
49
|
+
get_quotes
|
50
|
+
get_post_permalink
|
51
|
+
|
52
|
+
### Quotes
|
53
|
+
|
54
|
+
get_quote_author
|
55
|
+
get_quote_content
|
56
|
+
|
57
|
+
|
58
|
+
## Usage
|
28
59
|
|
29
|
-
|
60
|
+
At the top of your Ruby file:
|
61
|
+
|
62
|
+
require 'vbulletin_scraper'
|
63
|
+
|
64
|
+
Example usage:
|
65
|
+
|
66
|
+
forumScraper = VbulletinScraper::V4::ForumScraper.new('http://someforum.com')
|
67
|
+
isVbulletinForum = forumScraper.is_valid_vbulletin
|
68
|
+
vbulletinVersion = forumScraper.get_vbulletin_version
|
69
|
+
forumTitle = forumScraper.get_forum_title
|
70
|
+
forumUrl = forumScraper.get_forum_url
|
71
|
+
|
72
|
+
topicScraper = VbulletinScraper::V4::TopicScraper.new('http://someforum.com/forum/some-thread-section/1234-a-sample-thread')
|
73
|
+
topicTitle = topicScraper.get_topic_title
|
74
|
+
topicUrl = topicScraper.get_topic_url
|
75
|
+
posts = topicScraper.get_posts
|
76
|
+
|
77
|
+
posts.each do |post|
|
78
|
+
postScraper = VbulletinScraper::V4::PostScraper.new(post.to_s)
|
79
|
+
vbulletinPostId = postScraper.get_vbulletin_post_id
|
80
|
+
postAuthor = postScraper.get_post_author
|
81
|
+
postContent = postScraper.get_post_content
|
82
|
+
postSubmitDate = postScraper.get_post_submit_datetime
|
83
|
+
postPermalink = postScraper.get_post_permalink
|
84
|
+
quotes = postScraper.get_quotes
|
85
|
+
|
86
|
+
quotes.each do |quote|
|
87
|
+
quoteScraper = VbulletinScraper::V4::QuoteScraper.new(quote.to_s)
|
88
|
+
quoteAuthor = quoteScraper.get_quote_author
|
89
|
+
quoteContent = quoteScraper.get_quote_content
|
90
|
+
end
|
91
|
+
end
|
92
|
+
|
93
|
+
To grab multiple pages of posts (starting with latest):
|
94
|
+
|
95
|
+
url = 'http://someforum.com/forum/some-thread-section/1234-a-sample-thread'
|
96
|
+
currPage = 9999
|
97
|
+
postCounter = 0
|
98
|
+
maxPosts = 10
|
99
|
+
stop = false
|
100
|
+
|
101
|
+
until stop do
|
102
|
+
currPageUrl = url + '?page=' + currPage.to_s
|
103
|
+
currPageScraper = VbulletinScraper::V4::TopicScraper.new(currPageUrl)
|
104
|
+
currPagePosts = currPageScraper.get_posts
|
105
|
+
|
106
|
+
currPagePosts.each do |post|
|
107
|
+
postScraper = VbulletinScraper::V4::PostScraper.new(post.to_s)
|
108
|
+
|
109
|
+
# etc...
|
110
|
+
|
111
|
+
postCounter += 1
|
112
|
+
|
113
|
+
if postCounter >= maxPosts
|
114
|
+
stop = true
|
115
|
+
end
|
116
|
+
end
|
117
|
+
|
118
|
+
if currPage > 1
|
119
|
+
currPage -= 1
|
120
|
+
else
|
121
|
+
stop = true
|
122
|
+
end
|
123
|
+
end
|
30
124
|
|
31
|
-
To install this gem onto your local machine, run `bundle exec rake install`. To release a new version, update the version number in `version.rb`, and then run `bundle exec rake release`, which will create a git tag for the version, push git commits and tags, and push the `.gem` file to [rubygems.org](https://rubygems.org).
|
32
125
|
|
33
126
|
## Contributing
|
34
127
|
|
35
|
-
|
128
|
+
1. Fork it ( https://github.com/bendrick92/vbulletin_scraper/fork )
|
129
|
+
2. Create your feature branch (git checkout -b my-new-feature)
|
130
|
+
3. Commit your changes (git commit -am 'Add some feature')
|
131
|
+
4. Push to the branch (git push origin my-new-feature)
|
132
|
+
5. Create a new Pull Request
|
36
133
|
|
37
134
|
|
38
135
|
## License
|
@@ -57,16 +57,20 @@ module VbulletinScraper
|
|
57
57
|
rawDateTimeString = get_item_by_selector('.date')
|
58
58
|
if rawDateTimeString != nil
|
59
59
|
rawDateTimeString = get_raw_text(rawDateTimeString.text)
|
60
|
-
|
61
|
-
|
62
|
-
|
63
|
-
|
64
|
-
|
65
|
-
|
66
|
-
|
67
|
-
|
68
|
-
|
69
|
-
|
60
|
+
begin
|
61
|
+
if rawDateTimeString.include? 'Yesterday'
|
62
|
+
rawDateTimeString = rawDateTimeString.gsub('Yesterday', '')
|
63
|
+
formattedDateTimeString = Date.yesterday.strftime(dateFormat) + rawDateTimeString
|
64
|
+
submitDateTime = DateTime.strptime(formattedDateTimeString, dateTimeFormat)
|
65
|
+
elsif rawDateTimeString.include? 'Today'
|
66
|
+
rawDateTimeString = rawDateTimeString.gsub('Today', '')
|
67
|
+
formattedDateTimeString = Date.today.strftime(dateFormat) + rawDateTimeString
|
68
|
+
submitDateTime = DateTime.strptime(formattedDateTimeString, dateTimeFormat)
|
69
|
+
else
|
70
|
+
submitDateTime = DateTime.strptime(rawDateTimeString, dateTimeFormat)
|
71
|
+
end
|
72
|
+
rescue ArgumentError
|
73
|
+
submitDateTime = nil
|
70
74
|
end
|
71
75
|
return submitDateTime
|
72
76
|
end
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: vbulletin_scraper
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.3.
|
4
|
+
version: 0.3.2
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Ben Walters
|
8
8
|
autorequire:
|
9
9
|
bindir: exe
|
10
10
|
cert_chain: []
|
11
|
-
date:
|
11
|
+
date: 2017-02-03 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: nokogiri
|