tilde-scraper 0.0.1 → 0.0.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 233e57dca85aa06d060975d41dc93b3c42b4f8b5b780ea98bd3bc55a240fa821
4
- data.tar.gz: b7bdc67f83cf7a408ab46c18c37ac6043ed9d9efe27c46cce90b8643b7389509
3
+ metadata.gz: 0eebcc9c37f1a557411b9f036f34d4f6f2a71e7d9d13dd6848380a5b219aa9f5
4
+ data.tar.gz: 313dc490897b207cf45ec4004c7eb9be21f85fe8400f2b6221a510e696845b15
5
5
  SHA512:
6
- metadata.gz: 4b141b97282e2f2559fd7374c0a6267c11bdb0590635addc743868b17eb44de27908ce15d325ab774d3f133c9cbc1a3686944bd2df1962c2cf65520ade595102
7
- data.tar.gz: f55175e57c0273c2ce319c9d4dc8bc347c57e5499799b63f9a378d5d1e89001f80a1d4369f89733b96b26341a58769113c41e8f0c0ef2a0517426295c842995f
6
+ metadata.gz: 8274f406e2c357242a58f452b9f0b2a3fadc9a0297b1deab6d94f25feb58da0ef9522f4b177d21998f8e44d3e832932cb00d1b871f452d385d29015da09475f9
7
+ data.tar.gz: b44a96b15fe3d44d26e9c3acde82a5dcad4689f84d64f246fbe588a5c5edef5b69e617aa2606606e825242271f67f98ced110b7e62ec16c676a44712557d008b
@@ -1,4 +1,3 @@
1
1
  module TildeScraper
2
- @@page_id = 0
3
2
  end
4
3
  require_relative '../config/enviornment.rb'
@@ -1,12 +1,12 @@
1
1
  module TildeScraper
2
2
  @@page_id = 0
3
+ #Scrapes a page at url, creates topic objects for each topic, and returns a page object
3
4
  def self.get_page(url)
4
5
  data = TildeScraper::Scraper.scrape_page(url)
5
6
  #Set page_id in page data hash
6
7
  data[0][:page_id] = @@page_id
7
8
  #Create page object
8
9
  page = TildeScraper::Page.create(data[0])
9
-
10
10
  #Set page_id in all topic data hashes
11
11
  #Set group in all hashes if applicible
12
12
  data[1].each do |topic_hash|
@@ -21,6 +21,7 @@ module TildeScraper
21
21
  page
22
22
  end
23
23
 
24
+ #Scrapes a page for topics, and scrapes each topic's comments, returns a page object
24
25
  def self.get_page_with_comments(url)
25
26
  page = get_page(url)
26
27
  #Create comments for each topic
@@ -29,11 +30,13 @@ module TildeScraper
29
30
  end
30
31
  end
31
32
 
33
+ #Scrapes the group page for first level groups and returns an array of group objects
32
34
  def self.get_groups
33
35
  TildeScraper::Group.all.clear
34
- TildeScraper::Group.create_from_array(TildeScraper::Scraper.scrape_groups("/groups"))
36
+ TildeScraper::Group.create_from_array(TildeScraper::Scraper.scrape_groups("https://tildes.net/groups"))
35
37
  end
36
38
 
39
+ #Scrapes a topic page and returns an array of comment objects
37
40
  def self.get_comments(url)
38
41
  comment_array = TildeScraper::Scraper.scrape_comments(url)
39
42
  TildeScraper::Comment.create_from_array(comment_array)
@@ -47,7 +47,6 @@ class TildeScraper::Comment
47
47
  end
48
48
 
49
49
  def self.display(array, indent = 0)
50
- #binding.pry
51
50
  array.each do |comment|
52
51
  comment.display(indent)
53
52
  display(comment.children, indent + 1)
@@ -1,5 +1,4 @@
1
1
  class TildeScraper::Scraper
2
- BASE_URL = "https://tildes.net"
3
2
  #Returns an array with two elements.
4
3
  #the first a hash containing general page info
5
4
  #the secound an array of hashes containing topic info
@@ -21,7 +20,7 @@ class TildeScraper::Scraper
21
20
  info = {
22
21
  title: title.text,
23
22
  comment_count: topic.css("div.topic-info-comments").text.strip,
24
- comment_link: topic.css("div.topic-info-comments a").attribute("href").value.split(" ").first,
23
+ comment_link: "https://tildes.net" + topic.css("div.topic-info-comments a").attribute("href").value.split(" ").first,
25
24
  group: metadata.css("span.topic-group").text,
26
25
  word_count: metadata.css("span.topic-content-metadata").text.split(" ")[0],
27
26
  age: topic.css("time.time-responsive").attribute("data-abbreviated").value,
@@ -40,7 +39,7 @@ class TildeScraper::Scraper
40
39
  end
41
40
 
42
41
  def self.scrape_groups(url)
43
- doc = open_url(BASE_URL + url)
42
+ doc = open_url(url)
44
43
  out = doc.css("tr.group-level-0").map do |group|
45
44
  {
46
45
  name: group.css("a").text,
@@ -52,7 +51,7 @@ class TildeScraper::Scraper
52
51
  end
53
52
 
54
53
  def self.scrape_comments(url)
55
- doc = open_url(BASE_URL + url)
54
+ doc = open_url(url)
56
55
  comments = doc.css("#comments")
57
56
  array = scrape_children(comments, url)
58
57
  array
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: tilde-scraper
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.0.1
4
+ version: 0.0.2
5
5
  platform: ruby
6
6
  authors:
7
7
  - Noah Evans
@@ -52,8 +52,8 @@ dependencies:
52
52
  - - "~>"
53
53
  - !ruby/object:Gem::Version
54
54
  version: '0.12'
55
- description: Scrapes the website tildes.net, can scrape topics on a page, contents
56
- or link of a topic, top level groups, and comments on a topic
55
+ description: Scrapes the website tildes.net. Can scrape topics on a page, top level
56
+ groups, and comments on a topic. Also has a very basic cli for browsing
57
57
  email: noah@nevans.me
58
58
  executables:
59
59
  - tilde-scraper