tilde-scraper 0.0.1 → 0.0.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/lib/tilde_scraper.rb +0 -1
- data/lib/tilde_scraper/api.rb +5 -2
- data/lib/tilde_scraper/comment.rb +0 -1
- data/lib/tilde_scraper/scraper.rb +3 -4
- metadata +3 -3
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 0eebcc9c37f1a557411b9f036f34d4f6f2a71e7d9d13dd6848380a5b219aa9f5
|
4
|
+
data.tar.gz: 313dc490897b207cf45ec4004c7eb9be21f85fe8400f2b6221a510e696845b15
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 8274f406e2c357242a58f452b9f0b2a3fadc9a0297b1deab6d94f25feb58da0ef9522f4b177d21998f8e44d3e832932cb00d1b871f452d385d29015da09475f9
|
7
|
+
data.tar.gz: b44a96b15fe3d44d26e9c3acde82a5dcad4689f84d64f246fbe588a5c5edef5b69e617aa2606606e825242271f67f98ced110b7e62ec16c676a44712557d008b
|
data/lib/tilde_scraper.rb
CHANGED
data/lib/tilde_scraper/api.rb
CHANGED
@@ -1,12 +1,12 @@
|
|
1
1
|
module TildeScraper
|
2
2
|
@@page_id = 0
|
3
|
+
#Scrapes a page at url, creates topic objects for each topic, and returns a page object
|
3
4
|
def self.get_page(url)
|
4
5
|
data = TildeScraper::Scraper.scrape_page(url)
|
5
6
|
#Set page_id in page data hash
|
6
7
|
data[0][:page_id] = @@page_id
|
7
8
|
#Create page object
|
8
9
|
page = TildeScraper::Page.create(data[0])
|
9
|
-
|
10
10
|
#Set page_id in all topic data hashes
|
11
11
|
#Set group in all hashes if applicible
|
12
12
|
data[1].each do |topic_hash|
|
@@ -21,6 +21,7 @@ module TildeScraper
|
|
21
21
|
page
|
22
22
|
end
|
23
23
|
|
24
|
+
#Scrapes a page for topics, and scrapes each topic's comments, returns a page object
|
24
25
|
def self.get_page_with_comments(url)
|
25
26
|
page = get_page(url)
|
26
27
|
#Create comments for each topic
|
@@ -29,11 +30,13 @@ module TildeScraper
|
|
29
30
|
end
|
30
31
|
end
|
31
32
|
|
33
|
+
#Scrapes the group page for first level groups and returns an array of group objects
|
32
34
|
def self.get_groups
|
33
35
|
TildeScraper::Group.all.clear
|
34
|
-
TildeScraper::Group.create_from_array(TildeScraper::Scraper.scrape_groups("/groups"))
|
36
|
+
TildeScraper::Group.create_from_array(TildeScraper::Scraper.scrape_groups("https://tildes.net/groups"))
|
35
37
|
end
|
36
38
|
|
39
|
+
#Scrapes a topic page and returns an array of comment objects
|
37
40
|
def self.get_comments(url)
|
38
41
|
comment_array = TildeScraper::Scraper.scrape_comments(url)
|
39
42
|
TildeScraper::Comment.create_from_array(comment_array)
|
@@ -1,5 +1,4 @@
|
|
1
1
|
class TildeScraper::Scraper
|
2
|
-
BASE_URL = "https://tildes.net"
|
3
2
|
#Returns an array with two elements.
|
4
3
|
#the first a hash containing general page info
|
5
4
|
#the secound an array of hashes containing topic info
|
@@ -21,7 +20,7 @@ class TildeScraper::Scraper
|
|
21
20
|
info = {
|
22
21
|
title: title.text,
|
23
22
|
comment_count: topic.css("div.topic-info-comments").text.strip,
|
24
|
-
comment_link: topic.css("div.topic-info-comments a").attribute("href").value.split(" ").first,
|
23
|
+
comment_link: "https://tildes.net" + topic.css("div.topic-info-comments a").attribute("href").value.split(" ").first,
|
25
24
|
group: metadata.css("span.topic-group").text,
|
26
25
|
word_count: metadata.css("span.topic-content-metadata").text.split(" ")[0],
|
27
26
|
age: topic.css("time.time-responsive").attribute("data-abbreviated").value,
|
@@ -40,7 +39,7 @@ class TildeScraper::Scraper
|
|
40
39
|
end
|
41
40
|
|
42
41
|
def self.scrape_groups(url)
|
43
|
-
doc = open_url(
|
42
|
+
doc = open_url(url)
|
44
43
|
out = doc.css("tr.group-level-0").map do |group|
|
45
44
|
{
|
46
45
|
name: group.css("a").text,
|
@@ -52,7 +51,7 @@ class TildeScraper::Scraper
|
|
52
51
|
end
|
53
52
|
|
54
53
|
def self.scrape_comments(url)
|
55
|
-
doc = open_url(
|
54
|
+
doc = open_url(url)
|
56
55
|
comments = doc.css("#comments")
|
57
56
|
array = scrape_children(comments, url)
|
58
57
|
array
|
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: tilde-scraper
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.0.
|
4
|
+
version: 0.0.2
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Noah Evans
|
@@ -52,8 +52,8 @@ dependencies:
|
|
52
52
|
- - "~>"
|
53
53
|
- !ruby/object:Gem::Version
|
54
54
|
version: '0.12'
|
55
|
-
description: Scrapes the website tildes.net
|
56
|
-
|
55
|
+
description: Scrapes the website tildes.net. Can scrape topics on a page, top level
|
56
|
+
groups, and comments on a topic. Also has a very basic cli for browsing
|
57
57
|
email: noah@nevans.me
|
58
58
|
executables:
|
59
59
|
- tilde-scraper
|