pointmd_comments 0.1.3 → 0.2.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: fca56a444c6f4243e673339bf85174fbb71dd2a959d3ad2b52b9728fbd598dbe
|
4
|
+
data.tar.gz: 208e3c91a53f539a08cf9d6a3dec0606f25851ac576898c9caa2975c08251018
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 170db4ed751423521520ce60d48fd30c40dd8212685c07672e749c90ccea7ef8a688e58a7f7bd4880885c621188e8414e883878669c65b83e43861d861dd1a37
|
7
|
+
data.tar.gz: cd3966ec082acbcbd6484b586593ea9b8b1c08fe3cba235c8c2292ef48044454575a39bd7f171afb0930effa215c55a72d4d60dc9ea0950e4965527dd24f474c
|
data/bin/pointmd_comments
CHANGED
data/lib/pointmd_comments.rb
CHANGED
@@ -16,7 +16,11 @@ require 'pointmd_comments/errors/unknown_source'
|
|
16
16
|
module PointmdComments
|
17
17
|
class Error < StandardError; end
|
18
18
|
|
19
|
-
def self.collect
|
19
|
+
def self.collect(options = {})
|
20
|
+
Aggregators::Main.new(options).call
|
21
|
+
end
|
22
|
+
|
23
|
+
def self.collect_from_shell
|
20
24
|
args = ARGV.dup
|
21
25
|
options = OptParser.new.parse
|
22
26
|
Aggregators::Main.new(options).call
|
@@ -1,16 +1,23 @@
|
|
1
1
|
module PointmdComments
|
2
2
|
module Aggregators
|
3
3
|
class Main
|
4
|
+
CHROME_ARGS = %w[disable-dev-shm-usage disable-software-rasterizer no-sandbox].freeze
|
5
|
+
|
4
6
|
attr_reader :posts_aggregator, :browser, :comments_aggregator, :all_comments, :posts, :source, :output, :path
|
5
7
|
|
6
8
|
def initialize(options)
|
7
9
|
# Currently 'path' is not supported
|
8
10
|
@path = nil
|
9
|
-
@output = options[:output]
|
10
|
-
@source = options[:source]
|
11
|
+
@output = options[:output] || default_output_path
|
12
|
+
@source = options[:source] || Aggregators::Posts::DEFAULT_SOURCE
|
11
13
|
@posts_aggregator = Aggregators::Posts.new(source: source, path: path)
|
12
14
|
@comments_aggregator = Aggregators::Comments.new
|
13
|
-
|
15
|
+
|
16
|
+
client = Selenium::WebDriver::Remote::Http::Default.new
|
17
|
+
# NOTE: #timeout= is deprecated, use #read_timeout= and #open_timeout= instead
|
18
|
+
client.timeout = 600 # instead of the default 60 (seconds)
|
19
|
+
|
20
|
+
@browser = ::Watir::Browser.new :chrome, http_client: client, headless: true, args: CHROME_ARGS
|
14
21
|
@all_comments = []
|
15
22
|
end
|
16
23
|
|
@@ -46,7 +53,9 @@ module PointmdComments
|
|
46
53
|
end
|
47
54
|
|
48
55
|
def write_to_csv
|
49
|
-
|
56
|
+
# File#expand_path is needed to process paths like '~/test.txt' => '/Users/me/test.txt'
|
57
|
+
file_path = File.expand_path(output)
|
58
|
+
puts "File Path is #{file_path}"
|
50
59
|
|
51
60
|
CSV.open(file_path, 'w') do |csv|
|
52
61
|
all_comments.each { |c| csv << c }
|
@@ -3,6 +3,7 @@ module PointmdComments
|
|
3
3
|
class Posts
|
4
4
|
# NOTE: This array may be populated with other website sections in the future.
|
5
5
|
ALLOWED_SOURCES = %i[news today].freeze
|
6
|
+
DEFAULT_SOURCE = :news
|
6
7
|
MAIN_PAGE = 'https://point.md/ru/'.freeze
|
7
8
|
|
8
9
|
attr_reader :source, :urls
|
@@ -10,7 +11,6 @@ module PointmdComments
|
|
10
11
|
def initialize(source:, path:)
|
11
12
|
@source = source
|
12
13
|
@path = path
|
13
|
-
@urls = []
|
14
14
|
end
|
15
15
|
|
16
16
|
def call
|
@@ -40,22 +40,12 @@ module PointmdComments
|
|
40
40
|
end
|
41
41
|
|
42
42
|
def fetch_news_posts
|
43
|
-
|
44
|
-
|
43
|
+
# NOTE: .post-blocks-wrap does not exist anymore
|
44
|
+
# Find <article> tags instead
|
45
|
+
articles = @page.css('article')
|
45
46
|
|
46
|
-
|
47
|
-
|
48
|
-
|
49
|
-
populate_urls(main_post, other_posts)
|
50
|
-
end
|
51
|
-
|
52
|
-
def populate_urls(main_post, other_posts)
|
53
|
-
@urls << main_post
|
54
|
-
|
55
|
-
@urls += other_posts.children.map do |child|
|
56
|
-
child.css('article').css('h2').children[1]['href']
|
57
|
-
rescue StandardError
|
58
|
-
next
|
47
|
+
@urls = articles.map do |article|
|
48
|
+
article.css('a').attribute('href').to_s
|
59
49
|
end.compact
|
60
50
|
end
|
61
51
|
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: pointmd_comments
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.
|
4
|
+
version: 0.2.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Nicolai Stoianov
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date:
|
11
|
+
date: 2021-04-30 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: pry
|
@@ -120,7 +120,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
|
|
120
120
|
- !ruby/object:Gem::Version
|
121
121
|
version: '0'
|
122
122
|
requirements: []
|
123
|
-
rubygems_version: 3.
|
123
|
+
rubygems_version: 3.2.3
|
124
124
|
signing_key:
|
125
125
|
specification_version: 4
|
126
126
|
summary: Point.md comments aggregator
|