pointmd_comments 0.1.3 → 0.2.0

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 564a71620179d2b409489f0b96711361c2dfceb07d268bbb47fb4e164934de35
4
- data.tar.gz: 2c77c09da91a7ccaad9f429107633cd04dce8747945b5063e45ddb9bf71e7414
3
+ metadata.gz: fca56a444c6f4243e673339bf85174fbb71dd2a959d3ad2b52b9728fbd598dbe
4
+ data.tar.gz: 208e3c91a53f539a08cf9d6a3dec0606f25851ac576898c9caa2975c08251018
5
5
  SHA512:
6
- metadata.gz: 211f18075aed368f744d3fb7d902849b09f642f8ac9275fe47a2b5ec6fb9a47acf327528f36941373b00f26d9f606685f4c372c88c35f5c06a8f46292823985d
7
- data.tar.gz: fa6938170ae9ed200a6b19ef385c0751c2bf03f48150bbb1ae591fab8ab01912c738a7d9cdd94b19c21a735a0356579f4579bbabd40aff2d2d5f2a0470450fed
6
+ metadata.gz: 170db4ed751423521520ce60d48fd30c40dd8212685c07672e749c90ccea7ef8a688e58a7f7bd4880885c621188e8414e883878669c65b83e43861d861dd1a37
7
+ data.tar.gz: cd3966ec082acbcbd6484b586593ea9b8b1c08fe3cba235c8c2292ef48044454575a39bd7f171afb0930effa215c55a72d4d60dc9ea0950e4965527dd24f474c
data/bin/pointmd_comments CHANGED
@@ -1,4 +1,4 @@
1
1
  #!/usr/bin/env ruby
2
2
  require 'pointmd_comments'
3
3
 
4
- PointmdComments.collect
4
+ PointmdComments.collect_from_shell
@@ -16,7 +16,11 @@ require 'pointmd_comments/errors/unknown_source'
16
16
  module PointmdComments
17
17
  class Error < StandardError; end
18
18
 
19
- def self.collect
19
+ def self.collect(options = {})
20
+ Aggregators::Main.new(options).call
21
+ end
22
+
23
+ def self.collect_from_shell
20
24
  args = ARGV.dup
21
25
  options = OptParser.new.parse
22
26
  Aggregators::Main.new(options).call
@@ -1,16 +1,23 @@
1
1
  module PointmdComments
2
2
  module Aggregators
3
3
  class Main
4
+ CHROME_ARGS = %w[disable-dev-shm-usage disable-software-rasterizer no-sandbox].freeze
5
+
4
6
  attr_reader :posts_aggregator, :browser, :comments_aggregator, :all_comments, :posts, :source, :output, :path
5
7
 
6
8
  def initialize(options)
7
9
  # Currently 'path' is not supported
8
10
  @path = nil
9
- @output = options[:output]
10
- @source = options[:source]
11
+ @output = options[:output] || default_output_path
12
+ @source = options[:source] || Aggregators::Posts::DEFAULT_SOURCE
11
13
  @posts_aggregator = Aggregators::Posts.new(source: source, path: path)
12
14
  @comments_aggregator = Aggregators::Comments.new
13
- @browser = ::Watir::Browser.new :chrome, headless: true
15
+
16
+ client = Selenium::WebDriver::Remote::Http::Default.new
17
+ # NOTE: #timeout= is deprecated, use #read_timeout= and #open_timeout= instead
18
+ client.timeout = 600 # instead of the default 60 (seconds)
19
+
20
+ @browser = ::Watir::Browser.new :chrome, http_client: client, headless: true, args: CHROME_ARGS
14
21
  @all_comments = []
15
22
  end
16
23
 
@@ -46,7 +53,9 @@ module PointmdComments
46
53
  end
47
54
 
48
55
  def write_to_csv
49
- file_path = output || default_output_path
56
+ # File#expand_path is needed to process paths like '~/test.txt' => '/Users/me/test.txt'
57
+ file_path = File.expand_path(output)
58
+ puts "File Path is #{file_path}"
50
59
 
51
60
  CSV.open(file_path, 'w') do |csv|
52
61
  all_comments.each { |c| csv << c }
@@ -3,6 +3,7 @@ module PointmdComments
3
3
  class Posts
4
4
  # NOTE: This array may be populated with other website sections in the future.
5
5
  ALLOWED_SOURCES = %i[news today].freeze
6
+ DEFAULT_SOURCE = :news
6
7
  MAIN_PAGE = 'https://point.md/ru/'.freeze
7
8
 
8
9
  attr_reader :source, :urls
@@ -10,7 +11,6 @@ module PointmdComments
10
11
  def initialize(source:, path:)
11
12
  @source = source
12
13
  @path = path
13
- @urls = []
14
14
  end
15
15
 
16
16
  def call
@@ -40,22 +40,12 @@ module PointmdComments
40
40
  end
41
41
 
42
42
  def fetch_news_posts
43
- posts_block = @page.css('.post-blocks-wrap')
44
- main_post_heading = posts_block.children.css('.post-big-block').children.css('h2')
43
+ # NOTE: .post-blocks-wrap does not exist anymore
44
+ # Find <article> tags instead
45
+ articles = @page.css('article')
45
46
 
46
- main_post = main_post_heading.children.css('a').first['href']
47
- other_posts = posts_block.children.css('.post-small-blocks-wrap')
48
-
49
- populate_urls(main_post, other_posts)
50
- end
51
-
52
- def populate_urls(main_post, other_posts)
53
- @urls << main_post
54
-
55
- @urls += other_posts.children.map do |child|
56
- child.css('article').css('h2').children[1]['href']
57
- rescue StandardError
58
- next
47
+ @urls = articles.map do |article|
48
+ article.css('a').attribute('href').to_s
59
49
  end.compact
60
50
  end
61
51
 
@@ -1,3 +1,3 @@
1
1
  module PointmdComments
2
- VERSION = '0.1.3'.freeze
2
+ VERSION = '0.2.0'.freeze
3
3
  end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: pointmd_comments
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.1.3
4
+ version: 0.2.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - Nicolai Stoianov
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2020-11-11 00:00:00.000000000 Z
11
+ date: 2021-04-30 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: pry
@@ -120,7 +120,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
120
120
  - !ruby/object:Gem::Version
121
121
  version: '0'
122
122
  requirements: []
123
- rubygems_version: 3.1.2
123
+ rubygems_version: 3.2.3
124
124
  signing_key:
125
125
  specification_version: 4
126
126
  summary: Point.md comments aggregator