pointmd_comments 0.1.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +7 -0
- data/bin/pointmd_comments +4 -0
- data/lib/pointmd_comments.rb +31 -0
- data/lib/pointmd_comments/aggregators/comments.rb +21 -0
- data/lib/pointmd_comments/aggregators/main.rb +53 -0
- data/lib/pointmd_comments/aggregators/posts.rb +67 -0
- data/lib/pointmd_comments/errors/not_implemented.rb +5 -0
- data/lib/pointmd_comments/errors/unknown_source.rb +13 -0
- data/lib/pointmd_comments/opt_parser.rb +60 -0
- data/lib/pointmd_comments/version.rb +3 -0
- metadata +127 -0
checksums.yaml
ADDED
@@ -0,0 +1,7 @@
|
|
1
|
+
---
|
2
|
+
SHA256:
|
3
|
+
metadata.gz: b3f84ad7c180458bae9b8d07589647dae11825d3580bbbec79602fb22cafcb8a
|
4
|
+
data.tar.gz: 060d0ce0518fb48dbd105ca9c3084e687ced21d08c8e1ee1458e8a47eea8f7d7
|
5
|
+
SHA512:
|
6
|
+
metadata.gz: 77a227fd2f38df75955f865350451a8a905bdeaa2a7ed959822c8c0322499923e4e20ab1ea17e1fe19a1c974190d565150617c288a29dca5bc9326bb645bb686
|
7
|
+
data.tar.gz: b8584558d781b6cd09c566d3e2c6b1455686f40efe22edb7745867f6ece79de6b13f2b7ae6719010efb1bb22785e8070ebd703545d43bb12b588ab4ddf1a4b45
|
@@ -0,0 +1,31 @@
|
|
1
|
+
require 'nokogiri'
|
2
|
+
require 'open-uri'
|
3
|
+
require 'csv'
|
4
|
+
require 'watir'
|
5
|
+
|
6
|
+
require 'pointmd_comments/version'
|
7
|
+
require 'pointmd_comments/opt_parser'
|
8
|
+
|
9
|
+
require 'pointmd_comments/aggregators/main'
|
10
|
+
require 'pointmd_comments/aggregators/posts'
|
11
|
+
require 'pointmd_comments/aggregators/comments'
|
12
|
+
|
13
|
+
require 'pointmd_comments/errors/not_implemented'
|
14
|
+
require 'pointmd_comments/errors/unknown_source'
|
15
|
+
|
16
|
+
require 'pry'
|
17
|
+
|
18
|
+
module PointmdComments
|
19
|
+
class Error < StandardError; end
|
20
|
+
|
21
|
+
def self.collect
|
22
|
+
# DOESN'T WORK!!
|
23
|
+
args = ARGV.dup
|
24
|
+
puts args
|
25
|
+
options = OptParser.new.parse
|
26
|
+
Aggregators::Main.new(options).call
|
27
|
+
rescue StandardError => e
|
28
|
+
puts e.class, e.message
|
29
|
+
puts e.backtrace if args.include? '-v'
|
30
|
+
end
|
31
|
+
end
|
@@ -0,0 +1,21 @@
|
|
1
|
+
module PointmdComments
|
2
|
+
module Aggregators
|
3
|
+
class Comments
|
4
|
+
def call(page)
|
5
|
+
parse_comments(page)
|
6
|
+
end
|
7
|
+
|
8
|
+
private
|
9
|
+
|
10
|
+
def parse_comments(page)
|
11
|
+
comments = page.css('div#simpals-comments-list').children.first.children
|
12
|
+
comments.map do |comment|
|
13
|
+
comment.css('p').children.last.text
|
14
|
+
|
15
|
+
rescue StandardError
|
16
|
+
next
|
17
|
+
end.compact
|
18
|
+
end
|
19
|
+
end
|
20
|
+
end
|
21
|
+
end
|
@@ -0,0 +1,53 @@
|
|
1
|
+
module PointmdComments
|
2
|
+
module Aggregators
|
3
|
+
class Main
|
4
|
+
attr_reader :posts_aggregator, :browser, :comments_aggregator, :all_comments, :posts, :source, :path
|
5
|
+
|
6
|
+
def initialize(options)
|
7
|
+
@path = options[:path]
|
8
|
+
@source = options[:source]
|
9
|
+
@posts_aggregator = Aggregators::Posts.new(source: source, path: path)
|
10
|
+
@comments_aggregator = Aggregators::Comments.new
|
11
|
+
@browser = ::Watir::Browser.new :chrome, headless: true
|
12
|
+
@all_comments = []
|
13
|
+
end
|
14
|
+
|
15
|
+
def call
|
16
|
+
@posts = posts_aggregator.call
|
17
|
+
|
18
|
+
collect_all_comments
|
19
|
+
|
20
|
+
write_to_csv
|
21
|
+
end
|
22
|
+
|
23
|
+
private
|
24
|
+
|
25
|
+
def collect_all_comments
|
26
|
+
posts.each do |url|
|
27
|
+
post_comments = collect_comments_from(url)
|
28
|
+
|
29
|
+
post_comments.each do |c|
|
30
|
+
all_comments << ["https://point.md#{url}", c]
|
31
|
+
end
|
32
|
+
end
|
33
|
+
end
|
34
|
+
|
35
|
+
def collect_comments_from(url)
|
36
|
+
puts "Collecting comments for #{url}.."
|
37
|
+
|
38
|
+
browser.goto "https://point.md#{url}"
|
39
|
+
browser.div(id: 'simpals-comments-list').wait_until(&:present?)
|
40
|
+
|
41
|
+
page = Nokogiri::HTML.parse(browser.html)
|
42
|
+
|
43
|
+
comments_aggregator.call(page)
|
44
|
+
end
|
45
|
+
|
46
|
+
def write_to_csv
|
47
|
+
CSV.open('output.csv', 'w') do |csv|
|
48
|
+
all_comments.each { |c| csv << c }
|
49
|
+
end
|
50
|
+
end
|
51
|
+
end
|
52
|
+
end
|
53
|
+
end
|
@@ -0,0 +1,67 @@
|
|
1
|
+
module PointmdComments
|
2
|
+
module Aggregators
|
3
|
+
class Posts
|
4
|
+
# NOTE: This array may be populated with other website sections in the future.
|
5
|
+
ALLOWED_SOURCES = %i[news today].freeze
|
6
|
+
MAIN_PAGE = 'https://point.md/ru/'.freeze
|
7
|
+
|
8
|
+
attr_reader :source, :urls
|
9
|
+
|
10
|
+
def initialize(source:, path:)
|
11
|
+
@source = source
|
12
|
+
@path = path
|
13
|
+
@urls = []
|
14
|
+
end
|
15
|
+
|
16
|
+
def call
|
17
|
+
validate_source
|
18
|
+
fetch_posts
|
19
|
+
puts "Found #{urls.count} links in the #{source} section.."
|
20
|
+
|
21
|
+
@urls
|
22
|
+
end
|
23
|
+
|
24
|
+
private
|
25
|
+
|
26
|
+
def validate_source
|
27
|
+
return if ALLOWED_SOURCES.include? source
|
28
|
+
|
29
|
+
raise ArgumentError, "Wrong source. Allowed sources are #{ALLOWED_SOURCES}"
|
30
|
+
end
|
31
|
+
|
32
|
+
def fetch_posts
|
33
|
+
@page = download_html
|
34
|
+
case source
|
35
|
+
when :news
|
36
|
+
fetch_news_posts
|
37
|
+
when :today
|
38
|
+
raise Errors::NotImplemented
|
39
|
+
end
|
40
|
+
end
|
41
|
+
|
42
|
+
def fetch_news_posts
|
43
|
+
posts_block = @page.css('.post-blocks-wrap')
|
44
|
+
main_post_heading = posts_block.children.css('.post-big-block').children.css('h2')
|
45
|
+
|
46
|
+
main_post = main_post_heading.children.css('a').first['href']
|
47
|
+
other_posts = posts_block.children.css('.post-small-blocks-wrap')
|
48
|
+
|
49
|
+
populate_urls(main_post, other_posts)
|
50
|
+
end
|
51
|
+
|
52
|
+
def populate_urls(main_post, other_posts)
|
53
|
+
@urls << main_post
|
54
|
+
|
55
|
+
@urls += other_posts.children.map do |child|
|
56
|
+
child.css('article').css('h2').children[1]['href']
|
57
|
+
rescue StandardError
|
58
|
+
next
|
59
|
+
end.compact
|
60
|
+
end
|
61
|
+
|
62
|
+
def download_html
|
63
|
+
Nokogiri::HTML(open('https://point.md/ru'))
|
64
|
+
end
|
65
|
+
end
|
66
|
+
end
|
67
|
+
end
|
@@ -0,0 +1,13 @@
|
|
1
|
+
module PointmdComments
|
2
|
+
module Errors
|
3
|
+
class UnknownSource < StandardError
|
4
|
+
attr_reader :message
|
5
|
+
|
6
|
+
def initialize
|
7
|
+
super
|
8
|
+
|
9
|
+
@message = "Unknown source specified. Allowed sources are: #{Aggregators::Posts::ALLOWED_SOURCES}"
|
10
|
+
end
|
11
|
+
end
|
12
|
+
end
|
13
|
+
end
|
@@ -0,0 +1,60 @@
|
|
1
|
+
require 'optparse'
|
2
|
+
require 'pointmd_comments/version'
|
3
|
+
|
4
|
+
module PointmdComments
|
5
|
+
class OptParser
|
6
|
+
attr_reader :options
|
7
|
+
|
8
|
+
def initialize
|
9
|
+
@options = {}
|
10
|
+
end
|
11
|
+
|
12
|
+
# rubocop:disable Metrics/MethodLength, Metrics/AbcSize
|
13
|
+
def parse
|
14
|
+
OptionParser.new do |opts|
|
15
|
+
opts.banner = 'Usage: pointmd_comments [options]'
|
16
|
+
|
17
|
+
opts.on('-v', '--verbose', 'Show logs and backtraces') do |v|
|
18
|
+
options[:verbose] = v
|
19
|
+
end
|
20
|
+
|
21
|
+
opts.on(
|
22
|
+
'-sSOURCE',
|
23
|
+
'--source=SOURCE',
|
24
|
+
"A source to pull links from. Available sources are: #{Aggregators::Posts::ALLOWED_SOURCES}"
|
25
|
+
) do |s|
|
26
|
+
options[:source] = s.to_sym
|
27
|
+
end
|
28
|
+
opts.on(
|
29
|
+
'-p PATH',
|
30
|
+
'--path=PATH',
|
31
|
+
'A file path to pull links from.'
|
32
|
+
) do |s|
|
33
|
+
options[:source] = s.to_sym
|
34
|
+
end
|
35
|
+
opts.on('-V', '--version', 'Version') do
|
36
|
+
puts PointmdComments::VERSION
|
37
|
+
exit
|
38
|
+
end
|
39
|
+
end.parse!
|
40
|
+
|
41
|
+
set_default_source unless options[:source] && options[:path]
|
42
|
+
validate_source
|
43
|
+
|
44
|
+
options
|
45
|
+
end
|
46
|
+
# rubocop:enable Metrics/MethodLength, Metrics/AbcSize
|
47
|
+
|
48
|
+
private
|
49
|
+
|
50
|
+
def set_default_source
|
51
|
+
options[:source] = :news
|
52
|
+
end
|
53
|
+
|
54
|
+
def validate_source
|
55
|
+
return if Aggregators::Posts::ALLOWED_SOURCES.include? options[:source]
|
56
|
+
|
57
|
+
raise Errors::UnknownSource
|
58
|
+
end
|
59
|
+
end
|
60
|
+
end
|
metadata
ADDED
@@ -0,0 +1,127 @@
|
|
1
|
+
--- !ruby/object:Gem::Specification
|
2
|
+
name: pointmd_comments
|
3
|
+
version: !ruby/object:Gem::Version
|
4
|
+
version: 0.1.0
|
5
|
+
platform: ruby
|
6
|
+
authors:
|
7
|
+
- Nicolai Stoianov
|
8
|
+
autorequire:
|
9
|
+
bindir: bin
|
10
|
+
cert_chain: []
|
11
|
+
date: 2020-11-11 00:00:00.000000000 Z
|
12
|
+
dependencies:
|
13
|
+
- !ruby/object:Gem::Dependency
|
14
|
+
name: pry
|
15
|
+
requirement: !ruby/object:Gem::Requirement
|
16
|
+
requirements:
|
17
|
+
- - ">="
|
18
|
+
- !ruby/object:Gem::Version
|
19
|
+
version: '0'
|
20
|
+
type: :development
|
21
|
+
prerelease: false
|
22
|
+
version_requirements: !ruby/object:Gem::Requirement
|
23
|
+
requirements:
|
24
|
+
- - ">="
|
25
|
+
- !ruby/object:Gem::Version
|
26
|
+
version: '0'
|
27
|
+
- !ruby/object:Gem::Dependency
|
28
|
+
name: rubocop
|
29
|
+
requirement: !ruby/object:Gem::Requirement
|
30
|
+
requirements:
|
31
|
+
- - '='
|
32
|
+
- !ruby/object:Gem::Version
|
33
|
+
version: 1.1.0
|
34
|
+
type: :development
|
35
|
+
prerelease: false
|
36
|
+
version_requirements: !ruby/object:Gem::Requirement
|
37
|
+
requirements:
|
38
|
+
- - '='
|
39
|
+
- !ruby/object:Gem::Version
|
40
|
+
version: 1.1.0
|
41
|
+
- !ruby/object:Gem::Dependency
|
42
|
+
name: nokogiri
|
43
|
+
requirement: !ruby/object:Gem::Requirement
|
44
|
+
requirements:
|
45
|
+
- - "~>"
|
46
|
+
- !ruby/object:Gem::Version
|
47
|
+
version: '1.10'
|
48
|
+
type: :runtime
|
49
|
+
prerelease: false
|
50
|
+
version_requirements: !ruby/object:Gem::Requirement
|
51
|
+
requirements:
|
52
|
+
- - "~>"
|
53
|
+
- !ruby/object:Gem::Version
|
54
|
+
version: '1.10'
|
55
|
+
- !ruby/object:Gem::Dependency
|
56
|
+
name: watir
|
57
|
+
requirement: !ruby/object:Gem::Requirement
|
58
|
+
requirements:
|
59
|
+
- - ">="
|
60
|
+
- !ruby/object:Gem::Version
|
61
|
+
version: '0'
|
62
|
+
type: :runtime
|
63
|
+
prerelease: false
|
64
|
+
version_requirements: !ruby/object:Gem::Requirement
|
65
|
+
requirements:
|
66
|
+
- - ">="
|
67
|
+
- !ruby/object:Gem::Version
|
68
|
+
version: '0'
|
69
|
+
- !ruby/object:Gem::Dependency
|
70
|
+
name: watir-webdriver
|
71
|
+
requirement: !ruby/object:Gem::Requirement
|
72
|
+
requirements:
|
73
|
+
- - ">="
|
74
|
+
- !ruby/object:Gem::Version
|
75
|
+
version: '0'
|
76
|
+
type: :runtime
|
77
|
+
prerelease: false
|
78
|
+
version_requirements: !ruby/object:Gem::Requirement
|
79
|
+
requirements:
|
80
|
+
- - ">="
|
81
|
+
- !ruby/object:Gem::Version
|
82
|
+
version: '0'
|
83
|
+
description: This gem lets you aggregate comments from point.md website into a csv
|
84
|
+
file.
|
85
|
+
email:
|
86
|
+
- stoianovnk@gmail.com
|
87
|
+
executables:
|
88
|
+
- pointmd_comments
|
89
|
+
extensions: []
|
90
|
+
extra_rdoc_files: []
|
91
|
+
files:
|
92
|
+
- bin/pointmd_comments
|
93
|
+
- lib/pointmd_comments.rb
|
94
|
+
- lib/pointmd_comments/aggregators/comments.rb
|
95
|
+
- lib/pointmd_comments/aggregators/main.rb
|
96
|
+
- lib/pointmd_comments/aggregators/posts.rb
|
97
|
+
- lib/pointmd_comments/errors/not_implemented.rb
|
98
|
+
- lib/pointmd_comments/errors/unknown_source.rb
|
99
|
+
- lib/pointmd_comments/opt_parser.rb
|
100
|
+
- lib/pointmd_comments/version.rb
|
101
|
+
homepage: https://github.com/ston1x/pointmd-comments
|
102
|
+
licenses:
|
103
|
+
- MIT
|
104
|
+
metadata:
|
105
|
+
allowed_push_host: https://rubygems.org
|
106
|
+
homepage_uri: https://github.com/ston1x/pointmd-comments
|
107
|
+
source_code_uri: https://github.com/ston1x/pointmd-comments
|
108
|
+
post_install_message:
|
109
|
+
rdoc_options: []
|
110
|
+
require_paths:
|
111
|
+
- lib
|
112
|
+
required_ruby_version: !ruby/object:Gem::Requirement
|
113
|
+
requirements:
|
114
|
+
- - ">="
|
115
|
+
- !ruby/object:Gem::Version
|
116
|
+
version: '2.5'
|
117
|
+
required_rubygems_version: !ruby/object:Gem::Requirement
|
118
|
+
requirements:
|
119
|
+
- - ">="
|
120
|
+
- !ruby/object:Gem::Version
|
121
|
+
version: '0'
|
122
|
+
requirements: []
|
123
|
+
rubygems_version: 3.1.2
|
124
|
+
signing_key:
|
125
|
+
specification_version: 4
|
126
|
+
summary: Point.md comments aggregator
|
127
|
+
test_files: []
|