pointmd_comments 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,7 @@
1
+ ---
2
+ SHA256:
3
+ metadata.gz: b3f84ad7c180458bae9b8d07589647dae11825d3580bbbec79602fb22cafcb8a
4
+ data.tar.gz: 060d0ce0518fb48dbd105ca9c3084e687ced21d08c8e1ee1458e8a47eea8f7d7
5
+ SHA512:
6
+ metadata.gz: 77a227fd2f38df75955f865350451a8a905bdeaa2a7ed959822c8c0322499923e4e20ab1ea17e1fe19a1c974190d565150617c288a29dca5bc9326bb645bb686
7
+ data.tar.gz: b8584558d781b6cd09c566d3e2c6b1455686f40efe22edb7745867f6ece79de6b13f2b7ae6719010efb1bb22785e8070ebd703545d43bb12b588ab4ddf1a4b45
@@ -0,0 +1,4 @@
1
+ #!/usr/bin/env ruby
2
+ require 'pointmd_comments'
3
+
4
+ PointmdComments.collect
@@ -0,0 +1,31 @@
1
+ require 'nokogiri'
2
+ require 'open-uri'
3
+ require 'csv'
4
+ require 'watir'
5
+
6
+ require 'pointmd_comments/version'
7
+ require 'pointmd_comments/opt_parser'
8
+
9
+ require 'pointmd_comments/aggregators/main'
10
+ require 'pointmd_comments/aggregators/posts'
11
+ require 'pointmd_comments/aggregators/comments'
12
+
13
+ require 'pointmd_comments/errors/not_implemented'
14
+ require 'pointmd_comments/errors/unknown_source'
15
+
16
+ require 'pry'
17
+
18
+ module PointmdComments
19
+ class Error < StandardError; end
20
+
21
+ def self.collect
22
+ # DOESN'T WORK!!
23
+ args = ARGV.dup
24
+ puts args
25
+ options = OptParser.new.parse
26
+ Aggregators::Main.new(options).call
27
+ rescue StandardError => e
28
+ puts e.class, e.message
29
+ puts e.backtrace if args.include? '-v'
30
+ end
31
+ end
@@ -0,0 +1,21 @@
1
+ module PointmdComments
2
+ module Aggregators
3
+ class Comments
4
+ def call(page)
5
+ parse_comments(page)
6
+ end
7
+
8
+ private
9
+
10
+ def parse_comments(page)
11
+ comments = page.css('div#simpals-comments-list').children.first.children
12
+ comments.map do |comment|
13
+ comment.css('p').children.last.text
14
+
15
+ rescue StandardError
16
+ next
17
+ end.compact
18
+ end
19
+ end
20
+ end
21
+ end
@@ -0,0 +1,53 @@
1
+ module PointmdComments
2
+ module Aggregators
3
+ class Main
4
+ attr_reader :posts_aggregator, :browser, :comments_aggregator, :all_comments, :posts, :source, :path
5
+
6
+ def initialize(options)
7
+ @path = options[:path]
8
+ @source = options[:source]
9
+ @posts_aggregator = Aggregators::Posts.new(source: source, path: path)
10
+ @comments_aggregator = Aggregators::Comments.new
11
+ @browser = ::Watir::Browser.new :chrome, headless: true
12
+ @all_comments = []
13
+ end
14
+
15
+ def call
16
+ @posts = posts_aggregator.call
17
+
18
+ collect_all_comments
19
+
20
+ write_to_csv
21
+ end
22
+
23
+ private
24
+
25
+ def collect_all_comments
26
+ posts.each do |url|
27
+ post_comments = collect_comments_from(url)
28
+
29
+ post_comments.each do |c|
30
+ all_comments << ["https://point.md#{url}", c]
31
+ end
32
+ end
33
+ end
34
+
35
+ def collect_comments_from(url)
36
+ puts "Collecting comments for #{url}.."
37
+
38
+ browser.goto "https://point.md#{url}"
39
+ browser.div(id: 'simpals-comments-list').wait_until(&:present?)
40
+
41
+ page = Nokogiri::HTML.parse(browser.html)
42
+
43
+ comments_aggregator.call(page)
44
+ end
45
+
46
+ def write_to_csv
47
+ CSV.open('output.csv', 'w') do |csv|
48
+ all_comments.each { |c| csv << c }
49
+ end
50
+ end
51
+ end
52
+ end
53
+ end
@@ -0,0 +1,67 @@
1
+ module PointmdComments
2
+ module Aggregators
3
+ class Posts
4
+ # NOTE: This array may be populated with other website sections in the future.
5
+ ALLOWED_SOURCES = %i[news today].freeze
6
+ MAIN_PAGE = 'https://point.md/ru/'.freeze
7
+
8
+ attr_reader :source, :urls
9
+
10
+ def initialize(source:, path:)
11
+ @source = source
12
+ @path = path
13
+ @urls = []
14
+ end
15
+
16
+ def call
17
+ validate_source
18
+ fetch_posts
19
+ puts "Found #{urls.count} links in the #{source} section.."
20
+
21
+ @urls
22
+ end
23
+
24
+ private
25
+
26
+ def validate_source
27
+ return if ALLOWED_SOURCES.include? source
28
+
29
+ raise ArgumentError, "Wrong source. Allowed sources are #{ALLOWED_SOURCES}"
30
+ end
31
+
32
+ def fetch_posts
33
+ @page = download_html
34
+ case source
35
+ when :news
36
+ fetch_news_posts
37
+ when :today
38
+ raise Errors::NotImplemented
39
+ end
40
+ end
41
+
42
+ def fetch_news_posts
43
+ posts_block = @page.css('.post-blocks-wrap')
44
+ main_post_heading = posts_block.children.css('.post-big-block').children.css('h2')
45
+
46
+ main_post = main_post_heading.children.css('a').first['href']
47
+ other_posts = posts_block.children.css('.post-small-blocks-wrap')
48
+
49
+ populate_urls(main_post, other_posts)
50
+ end
51
+
52
+ def populate_urls(main_post, other_posts)
53
+ @urls << main_post
54
+
55
+ @urls += other_posts.children.map do |child|
56
+ child.css('article').css('h2').children[1]['href']
57
+ rescue StandardError
58
+ next
59
+ end.compact
60
+ end
61
+
62
+ def download_html
63
+ Nokogiri::HTML(open('https://point.md/ru'))
64
+ end
65
+ end
66
+ end
67
+ end
@@ -0,0 +1,5 @@
1
+ module PointmdComments
2
+ module Errors
3
+ class NotImplemented < StandardError; end
4
+ end
5
+ end
@@ -0,0 +1,13 @@
1
+ module PointmdComments
2
+ module Errors
3
+ class UnknownSource < StandardError
4
+ attr_reader :message
5
+
6
+ def initialize
7
+ super
8
+
9
+ @message = "Unknown source specified. Allowed sources are: #{Aggregators::Posts::ALLOWED_SOURCES}"
10
+ end
11
+ end
12
+ end
13
+ end
@@ -0,0 +1,60 @@
1
+ require 'optparse'
2
+ require 'pointmd_comments/version'
3
+
4
+ module PointmdComments
5
+ class OptParser
6
+ attr_reader :options
7
+
8
+ def initialize
9
+ @options = {}
10
+ end
11
+
12
+ # rubocop:disable Metrics/MethodLength, Metrics/AbcSize
13
+ def parse
14
+ OptionParser.new do |opts|
15
+ opts.banner = 'Usage: pointmd_comments [options]'
16
+
17
+ opts.on('-v', '--verbose', 'Show logs and backtraces') do |v|
18
+ options[:verbose] = v
19
+ end
20
+
21
+ opts.on(
22
+ '-sSOURCE',
23
+ '--source=SOURCE',
24
+ "A source to pull links from. Available sources are: #{Aggregators::Posts::ALLOWED_SOURCES}"
25
+ ) do |s|
26
+ options[:source] = s.to_sym
27
+ end
28
+ opts.on(
29
+ '-p PATH',
30
+ '--path=PATH',
31
+ 'A file path to pull links from.'
32
+ ) do |s|
33
+ options[:source] = s.to_sym
34
+ end
35
+ opts.on('-V', '--version', 'Version') do
36
+ puts PointmdComments::VERSION
37
+ exit
38
+ end
39
+ end.parse!
40
+
41
+ set_default_source unless options[:source] && options[:path]
42
+ validate_source
43
+
44
+ options
45
+ end
46
+ # rubocop:enable Metrics/MethodLength, Metrics/AbcSize
47
+
48
+ private
49
+
50
+ def set_default_source
51
+ options[:source] = :news
52
+ end
53
+
54
+ def validate_source
55
+ return if Aggregators::Posts::ALLOWED_SOURCES.include? options[:source]
56
+
57
+ raise Errors::UnknownSource
58
+ end
59
+ end
60
+ end
@@ -0,0 +1,3 @@
1
+ module PointmdComments
2
+ VERSION = '0.1.0'.freeze
3
+ end
metadata ADDED
@@ -0,0 +1,127 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: pointmd_comments
3
+ version: !ruby/object:Gem::Version
4
+ version: 0.1.0
5
+ platform: ruby
6
+ authors:
7
+ - Nicolai Stoianov
8
+ autorequire:
9
+ bindir: bin
10
+ cert_chain: []
11
+ date: 2020-11-11 00:00:00.000000000 Z
12
+ dependencies:
13
+ - !ruby/object:Gem::Dependency
14
+ name: pry
15
+ requirement: !ruby/object:Gem::Requirement
16
+ requirements:
17
+ - - ">="
18
+ - !ruby/object:Gem::Version
19
+ version: '0'
20
+ type: :development
21
+ prerelease: false
22
+ version_requirements: !ruby/object:Gem::Requirement
23
+ requirements:
24
+ - - ">="
25
+ - !ruby/object:Gem::Version
26
+ version: '0'
27
+ - !ruby/object:Gem::Dependency
28
+ name: rubocop
29
+ requirement: !ruby/object:Gem::Requirement
30
+ requirements:
31
+ - - '='
32
+ - !ruby/object:Gem::Version
33
+ version: 1.1.0
34
+ type: :development
35
+ prerelease: false
36
+ version_requirements: !ruby/object:Gem::Requirement
37
+ requirements:
38
+ - - '='
39
+ - !ruby/object:Gem::Version
40
+ version: 1.1.0
41
+ - !ruby/object:Gem::Dependency
42
+ name: nokogiri
43
+ requirement: !ruby/object:Gem::Requirement
44
+ requirements:
45
+ - - "~>"
46
+ - !ruby/object:Gem::Version
47
+ version: '1.10'
48
+ type: :runtime
49
+ prerelease: false
50
+ version_requirements: !ruby/object:Gem::Requirement
51
+ requirements:
52
+ - - "~>"
53
+ - !ruby/object:Gem::Version
54
+ version: '1.10'
55
+ - !ruby/object:Gem::Dependency
56
+ name: watir
57
+ requirement: !ruby/object:Gem::Requirement
58
+ requirements:
59
+ - - ">="
60
+ - !ruby/object:Gem::Version
61
+ version: '0'
62
+ type: :runtime
63
+ prerelease: false
64
+ version_requirements: !ruby/object:Gem::Requirement
65
+ requirements:
66
+ - - ">="
67
+ - !ruby/object:Gem::Version
68
+ version: '0'
69
+ - !ruby/object:Gem::Dependency
70
+ name: watir-webdriver
71
+ requirement: !ruby/object:Gem::Requirement
72
+ requirements:
73
+ - - ">="
74
+ - !ruby/object:Gem::Version
75
+ version: '0'
76
+ type: :runtime
77
+ prerelease: false
78
+ version_requirements: !ruby/object:Gem::Requirement
79
+ requirements:
80
+ - - ">="
81
+ - !ruby/object:Gem::Version
82
+ version: '0'
83
+ description: This gem lets you aggregate comments from point.md website into a csv
84
+ file.
85
+ email:
86
+ - stoianovnk@gmail.com
87
+ executables:
88
+ - pointmd_comments
89
+ extensions: []
90
+ extra_rdoc_files: []
91
+ files:
92
+ - bin/pointmd_comments
93
+ - lib/pointmd_comments.rb
94
+ - lib/pointmd_comments/aggregators/comments.rb
95
+ - lib/pointmd_comments/aggregators/main.rb
96
+ - lib/pointmd_comments/aggregators/posts.rb
97
+ - lib/pointmd_comments/errors/not_implemented.rb
98
+ - lib/pointmd_comments/errors/unknown_source.rb
99
+ - lib/pointmd_comments/opt_parser.rb
100
+ - lib/pointmd_comments/version.rb
101
+ homepage: https://github.com/ston1x/pointmd-comments
102
+ licenses:
103
+ - MIT
104
+ metadata:
105
+ allowed_push_host: https://rubygems.org
106
+ homepage_uri: https://github.com/ston1x/pointmd-comments
107
+ source_code_uri: https://github.com/ston1x/pointmd-comments
108
+ post_install_message:
109
+ rdoc_options: []
110
+ require_paths:
111
+ - lib
112
+ required_ruby_version: !ruby/object:Gem::Requirement
113
+ requirements:
114
+ - - ">="
115
+ - !ruby/object:Gem::Version
116
+ version: '2.5'
117
+ required_rubygems_version: !ruby/object:Gem::Requirement
118
+ requirements:
119
+ - - ">="
120
+ - !ruby/object:Gem::Version
121
+ version: '0'
122
+ requirements: []
123
+ rubygems_version: 3.1.2
124
+ signing_key:
125
+ specification_version: 4
126
+ summary: Point.md comments aggregator
127
+ test_files: []