pointmd_comments 0.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/bin/pointmd_comments +4 -0
- data/lib/pointmd_comments.rb +31 -0
- data/lib/pointmd_comments/aggregators/comments.rb +21 -0
- data/lib/pointmd_comments/aggregators/main.rb +53 -0
- data/lib/pointmd_comments/aggregators/posts.rb +67 -0
- data/lib/pointmd_comments/errors/not_implemented.rb +5 -0
- data/lib/pointmd_comments/errors/unknown_source.rb +13 -0
- data/lib/pointmd_comments/opt_parser.rb +60 -0
- data/lib/pointmd_comments/version.rb +3 -0
- metadata +127 -0
checksums.yaml
ADDED
@@ -0,0 +1,7 @@
|
|
1
|
+
---
|
2
|
+
SHA256:
|
3
|
+
metadata.gz: b3f84ad7c180458bae9b8d07589647dae11825d3580bbbec79602fb22cafcb8a
|
4
|
+
data.tar.gz: 060d0ce0518fb48dbd105ca9c3084e687ced21d08c8e1ee1458e8a47eea8f7d7
|
5
|
+
SHA512:
|
6
|
+
metadata.gz: 77a227fd2f38df75955f865350451a8a905bdeaa2a7ed959822c8c0322499923e4e20ab1ea17e1fe19a1c974190d565150617c288a29dca5bc9326bb645bb686
|
7
|
+
data.tar.gz: b8584558d781b6cd09c566d3e2c6b1455686f40efe22edb7745867f6ece79de6b13f2b7ae6719010efb1bb22785e8070ebd703545d43bb12b588ab4ddf1a4b45
|
@@ -0,0 +1,31 @@
|
|
1
|
+
require 'nokogiri'
|
2
|
+
require 'open-uri'
|
3
|
+
require 'csv'
|
4
|
+
require 'watir'
|
5
|
+
|
6
|
+
require 'pointmd_comments/version'
|
7
|
+
require 'pointmd_comments/opt_parser'
|
8
|
+
|
9
|
+
require 'pointmd_comments/aggregators/main'
|
10
|
+
require 'pointmd_comments/aggregators/posts'
|
11
|
+
require 'pointmd_comments/aggregators/comments'
|
12
|
+
|
13
|
+
require 'pointmd_comments/errors/not_implemented'
|
14
|
+
require 'pointmd_comments/errors/unknown_source'
|
15
|
+
|
16
|
+
require 'pry'
|
17
|
+
|
18
|
+
module PointmdComments
|
19
|
+
class Error < StandardError; end
|
20
|
+
|
21
|
+
def self.collect
|
22
|
+
# DOESN'T WORK!!
|
23
|
+
args = ARGV.dup
|
24
|
+
puts args
|
25
|
+
options = OptParser.new.parse
|
26
|
+
Aggregators::Main.new(options).call
|
27
|
+
rescue StandardError => e
|
28
|
+
puts e.class, e.message
|
29
|
+
puts e.backtrace if args.include? '-v'
|
30
|
+
end
|
31
|
+
end
|
@@ -0,0 +1,21 @@
|
|
1
|
+
module PointmdComments
|
2
|
+
module Aggregators
|
3
|
+
class Comments
|
4
|
+
def call(page)
|
5
|
+
parse_comments(page)
|
6
|
+
end
|
7
|
+
|
8
|
+
private
|
9
|
+
|
10
|
+
def parse_comments(page)
|
11
|
+
comments = page.css('div#simpals-comments-list').children.first.children
|
12
|
+
comments.map do |comment|
|
13
|
+
comment.css('p').children.last.text
|
14
|
+
|
15
|
+
rescue StandardError
|
16
|
+
next
|
17
|
+
end.compact
|
18
|
+
end
|
19
|
+
end
|
20
|
+
end
|
21
|
+
end
|
@@ -0,0 +1,53 @@
|
|
1
|
+
module PointmdComments
|
2
|
+
module Aggregators
|
3
|
+
class Main
|
4
|
+
attr_reader :posts_aggregator, :browser, :comments_aggregator, :all_comments, :posts, :source, :path
|
5
|
+
|
6
|
+
def initialize(options)
|
7
|
+
@path = options[:path]
|
8
|
+
@source = options[:source]
|
9
|
+
@posts_aggregator = Aggregators::Posts.new(source: source, path: path)
|
10
|
+
@comments_aggregator = Aggregators::Comments.new
|
11
|
+
@browser = ::Watir::Browser.new :chrome, headless: true
|
12
|
+
@all_comments = []
|
13
|
+
end
|
14
|
+
|
15
|
+
def call
|
16
|
+
@posts = posts_aggregator.call
|
17
|
+
|
18
|
+
collect_all_comments
|
19
|
+
|
20
|
+
write_to_csv
|
21
|
+
end
|
22
|
+
|
23
|
+
private
|
24
|
+
|
25
|
+
def collect_all_comments
|
26
|
+
posts.each do |url|
|
27
|
+
post_comments = collect_comments_from(url)
|
28
|
+
|
29
|
+
post_comments.each do |c|
|
30
|
+
all_comments << ["https://point.md#{url}", c]
|
31
|
+
end
|
32
|
+
end
|
33
|
+
end
|
34
|
+
|
35
|
+
def collect_comments_from(url)
|
36
|
+
puts "Collecting comments for #{url}.."
|
37
|
+
|
38
|
+
browser.goto "https://point.md#{url}"
|
39
|
+
browser.div(id: 'simpals-comments-list').wait_until(&:present?)
|
40
|
+
|
41
|
+
page = Nokogiri::HTML.parse(browser.html)
|
42
|
+
|
43
|
+
comments_aggregator.call(page)
|
44
|
+
end
|
45
|
+
|
46
|
+
def write_to_csv
|
47
|
+
CSV.open('output.csv', 'w') do |csv|
|
48
|
+
all_comments.each { |c| csv << c }
|
49
|
+
end
|
50
|
+
end
|
51
|
+
end
|
52
|
+
end
|
53
|
+
end
|
@@ -0,0 +1,67 @@
|
|
1
|
+
module PointmdComments
|
2
|
+
module Aggregators
|
3
|
+
class Posts
|
4
|
+
# NOTE: This array may be populated with other website sections in the future.
|
5
|
+
ALLOWED_SOURCES = %i[news today].freeze
|
6
|
+
MAIN_PAGE = 'https://point.md/ru/'.freeze
|
7
|
+
|
8
|
+
attr_reader :source, :urls
|
9
|
+
|
10
|
+
def initialize(source:, path:)
|
11
|
+
@source = source
|
12
|
+
@path = path
|
13
|
+
@urls = []
|
14
|
+
end
|
15
|
+
|
16
|
+
def call
|
17
|
+
validate_source
|
18
|
+
fetch_posts
|
19
|
+
puts "Found #{urls.count} links in the #{source} section.."
|
20
|
+
|
21
|
+
@urls
|
22
|
+
end
|
23
|
+
|
24
|
+
private
|
25
|
+
|
26
|
+
def validate_source
|
27
|
+
return if ALLOWED_SOURCES.include? source
|
28
|
+
|
29
|
+
raise ArgumentError, "Wrong source. Allowed sources are #{ALLOWED_SOURCES}"
|
30
|
+
end
|
31
|
+
|
32
|
+
def fetch_posts
|
33
|
+
@page = download_html
|
34
|
+
case source
|
35
|
+
when :news
|
36
|
+
fetch_news_posts
|
37
|
+
when :today
|
38
|
+
raise Errors::NotImplemented
|
39
|
+
end
|
40
|
+
end
|
41
|
+
|
42
|
+
def fetch_news_posts
|
43
|
+
posts_block = @page.css('.post-blocks-wrap')
|
44
|
+
main_post_heading = posts_block.children.css('.post-big-block').children.css('h2')
|
45
|
+
|
46
|
+
main_post = main_post_heading.children.css('a').first['href']
|
47
|
+
other_posts = posts_block.children.css('.post-small-blocks-wrap')
|
48
|
+
|
49
|
+
populate_urls(main_post, other_posts)
|
50
|
+
end
|
51
|
+
|
52
|
+
def populate_urls(main_post, other_posts)
|
53
|
+
@urls << main_post
|
54
|
+
|
55
|
+
@urls += other_posts.children.map do |child|
|
56
|
+
child.css('article').css('h2').children[1]['href']
|
57
|
+
rescue StandardError
|
58
|
+
next
|
59
|
+
end.compact
|
60
|
+
end
|
61
|
+
|
62
|
+
def download_html
|
63
|
+
Nokogiri::HTML(open('https://point.md/ru'))
|
64
|
+
end
|
65
|
+
end
|
66
|
+
end
|
67
|
+
end
|
@@ -0,0 +1,13 @@
|
|
1
|
+
module PointmdComments
|
2
|
+
module Errors
|
3
|
+
class UnknownSource < StandardError
|
4
|
+
attr_reader :message
|
5
|
+
|
6
|
+
def initialize
|
7
|
+
super
|
8
|
+
|
9
|
+
@message = "Unknown source specified. Allowed sources are: #{Aggregators::Posts::ALLOWED_SOURCES}"
|
10
|
+
end
|
11
|
+
end
|
12
|
+
end
|
13
|
+
end
|
@@ -0,0 +1,60 @@
|
|
1
|
+
require 'optparse'
|
2
|
+
require 'pointmd_comments/version'
|
3
|
+
|
4
|
+
module PointmdComments
|
5
|
+
class OptParser
|
6
|
+
attr_reader :options
|
7
|
+
|
8
|
+
def initialize
|
9
|
+
@options = {}
|
10
|
+
end
|
11
|
+
|
12
|
+
# rubocop:disable Metrics/MethodLength, Metrics/AbcSize
|
13
|
+
def parse
|
14
|
+
OptionParser.new do |opts|
|
15
|
+
opts.banner = 'Usage: pointmd_comments [options]'
|
16
|
+
|
17
|
+
opts.on('-v', '--verbose', 'Show logs and backtraces') do |v|
|
18
|
+
options[:verbose] = v
|
19
|
+
end
|
20
|
+
|
21
|
+
opts.on(
|
22
|
+
'-sSOURCE',
|
23
|
+
'--source=SOURCE',
|
24
|
+
"A source to pull links from. Available sources are: #{Aggregators::Posts::ALLOWED_SOURCES}"
|
25
|
+
) do |s|
|
26
|
+
options[:source] = s.to_sym
|
27
|
+
end
|
28
|
+
opts.on(
|
29
|
+
'-p PATH',
|
30
|
+
'--path=PATH',
|
31
|
+
'A file path to pull links from.'
|
32
|
+
) do |s|
|
33
|
+
options[:source] = s.to_sym
|
34
|
+
end
|
35
|
+
opts.on('-V', '--version', 'Version') do
|
36
|
+
puts PointmdComments::VERSION
|
37
|
+
exit
|
38
|
+
end
|
39
|
+
end.parse!
|
40
|
+
|
41
|
+
set_default_source unless options[:source] && options[:path]
|
42
|
+
validate_source
|
43
|
+
|
44
|
+
options
|
45
|
+
end
|
46
|
+
# rubocop:enable Metrics/MethodLength, Metrics/AbcSize
|
47
|
+
|
48
|
+
private
|
49
|
+
|
50
|
+
def set_default_source
|
51
|
+
options[:source] = :news
|
52
|
+
end
|
53
|
+
|
54
|
+
def validate_source
|
55
|
+
return if Aggregators::Posts::ALLOWED_SOURCES.include? options[:source]
|
56
|
+
|
57
|
+
raise Errors::UnknownSource
|
58
|
+
end
|
59
|
+
end
|
60
|
+
end
|
metadata
ADDED
@@ -0,0 +1,127 @@
|
|
1
|
+
--- !ruby/object:Gem::Specification
|
2
|
+
name: pointmd_comments
|
3
|
+
version: !ruby/object:Gem::Version
|
4
|
+
version: 0.1.0
|
5
|
+
platform: ruby
|
6
|
+
authors:
|
7
|
+
- Nicolai Stoianov
|
8
|
+
autorequire:
|
9
|
+
bindir: bin
|
10
|
+
cert_chain: []
|
11
|
+
date: 2020-11-11 00:00:00.000000000 Z
|
12
|
+
dependencies:
|
13
|
+
- !ruby/object:Gem::Dependency
|
14
|
+
name: pry
|
15
|
+
requirement: !ruby/object:Gem::Requirement
|
16
|
+
requirements:
|
17
|
+
- - ">="
|
18
|
+
- !ruby/object:Gem::Version
|
19
|
+
version: '0'
|
20
|
+
type: :development
|
21
|
+
prerelease: false
|
22
|
+
version_requirements: !ruby/object:Gem::Requirement
|
23
|
+
requirements:
|
24
|
+
- - ">="
|
25
|
+
- !ruby/object:Gem::Version
|
26
|
+
version: '0'
|
27
|
+
- !ruby/object:Gem::Dependency
|
28
|
+
name: rubocop
|
29
|
+
requirement: !ruby/object:Gem::Requirement
|
30
|
+
requirements:
|
31
|
+
- - '='
|
32
|
+
- !ruby/object:Gem::Version
|
33
|
+
version: 1.1.0
|
34
|
+
type: :development
|
35
|
+
prerelease: false
|
36
|
+
version_requirements: !ruby/object:Gem::Requirement
|
37
|
+
requirements:
|
38
|
+
- - '='
|
39
|
+
- !ruby/object:Gem::Version
|
40
|
+
version: 1.1.0
|
41
|
+
- !ruby/object:Gem::Dependency
|
42
|
+
name: nokogiri
|
43
|
+
requirement: !ruby/object:Gem::Requirement
|
44
|
+
requirements:
|
45
|
+
- - "~>"
|
46
|
+
- !ruby/object:Gem::Version
|
47
|
+
version: '1.10'
|
48
|
+
type: :runtime
|
49
|
+
prerelease: false
|
50
|
+
version_requirements: !ruby/object:Gem::Requirement
|
51
|
+
requirements:
|
52
|
+
- - "~>"
|
53
|
+
- !ruby/object:Gem::Version
|
54
|
+
version: '1.10'
|
55
|
+
- !ruby/object:Gem::Dependency
|
56
|
+
name: watir
|
57
|
+
requirement: !ruby/object:Gem::Requirement
|
58
|
+
requirements:
|
59
|
+
- - ">="
|
60
|
+
- !ruby/object:Gem::Version
|
61
|
+
version: '0'
|
62
|
+
type: :runtime
|
63
|
+
prerelease: false
|
64
|
+
version_requirements: !ruby/object:Gem::Requirement
|
65
|
+
requirements:
|
66
|
+
- - ">="
|
67
|
+
- !ruby/object:Gem::Version
|
68
|
+
version: '0'
|
69
|
+
- !ruby/object:Gem::Dependency
|
70
|
+
name: watir-webdriver
|
71
|
+
requirement: !ruby/object:Gem::Requirement
|
72
|
+
requirements:
|
73
|
+
- - ">="
|
74
|
+
- !ruby/object:Gem::Version
|
75
|
+
version: '0'
|
76
|
+
type: :runtime
|
77
|
+
prerelease: false
|
78
|
+
version_requirements: !ruby/object:Gem::Requirement
|
79
|
+
requirements:
|
80
|
+
- - ">="
|
81
|
+
- !ruby/object:Gem::Version
|
82
|
+
version: '0'
|
83
|
+
description: This gem lets you aggregate comments from point.md website into a csv
|
84
|
+
file.
|
85
|
+
email:
|
86
|
+
- stoianovnk@gmail.com
|
87
|
+
executables:
|
88
|
+
- pointmd_comments
|
89
|
+
extensions: []
|
90
|
+
extra_rdoc_files: []
|
91
|
+
files:
|
92
|
+
- bin/pointmd_comments
|
93
|
+
- lib/pointmd_comments.rb
|
94
|
+
- lib/pointmd_comments/aggregators/comments.rb
|
95
|
+
- lib/pointmd_comments/aggregators/main.rb
|
96
|
+
- lib/pointmd_comments/aggregators/posts.rb
|
97
|
+
- lib/pointmd_comments/errors/not_implemented.rb
|
98
|
+
- lib/pointmd_comments/errors/unknown_source.rb
|
99
|
+
- lib/pointmd_comments/opt_parser.rb
|
100
|
+
- lib/pointmd_comments/version.rb
|
101
|
+
homepage: https://github.com/ston1x/pointmd-comments
|
102
|
+
licenses:
|
103
|
+
- MIT
|
104
|
+
metadata:
|
105
|
+
allowed_push_host: https://rubygems.org
|
106
|
+
homepage_uri: https://github.com/ston1x/pointmd-comments
|
107
|
+
source_code_uri: https://github.com/ston1x/pointmd-comments
|
108
|
+
post_install_message:
|
109
|
+
rdoc_options: []
|
110
|
+
require_paths:
|
111
|
+
- lib
|
112
|
+
required_ruby_version: !ruby/object:Gem::Requirement
|
113
|
+
requirements:
|
114
|
+
- - ">="
|
115
|
+
- !ruby/object:Gem::Version
|
116
|
+
version: '2.5'
|
117
|
+
required_rubygems_version: !ruby/object:Gem::Requirement
|
118
|
+
requirements:
|
119
|
+
- - ">="
|
120
|
+
- !ruby/object:Gem::Version
|
121
|
+
version: '0'
|
122
|
+
requirements: []
|
123
|
+
rubygems_version: 3.1.2
|
124
|
+
signing_key:
|
125
|
+
specification_version: 4
|
126
|
+
summary: Point.md comments aggregator
|
127
|
+
test_files: []
|