pointmd_comments 0.1.0

Sign up to get free protection for your applications and to get access to all the features.
@@ -0,0 +1,7 @@
1
+ ---
2
+ SHA256:
3
+ metadata.gz: b3f84ad7c180458bae9b8d07589647dae11825d3580bbbec79602fb22cafcb8a
4
+ data.tar.gz: 060d0ce0518fb48dbd105ca9c3084e687ced21d08c8e1ee1458e8a47eea8f7d7
5
+ SHA512:
6
+ metadata.gz: 77a227fd2f38df75955f865350451a8a905bdeaa2a7ed959822c8c0322499923e4e20ab1ea17e1fe19a1c974190d565150617c288a29dca5bc9326bb645bb686
7
+ data.tar.gz: b8584558d781b6cd09c566d3e2c6b1455686f40efe22edb7745867f6ece79de6b13f2b7ae6719010efb1bb22785e8070ebd703545d43bb12b588ab4ddf1a4b45
@@ -0,0 +1,4 @@
1
+ #!/usr/bin/env ruby
2
+ require 'pointmd_comments'
3
+
4
+ PointmdComments.collect
@@ -0,0 +1,31 @@
1
+ require 'nokogiri'
2
+ require 'open-uri'
3
+ require 'csv'
4
+ require 'watir'
5
+
6
+ require 'pointmd_comments/version'
7
+ require 'pointmd_comments/opt_parser'
8
+
9
+ require 'pointmd_comments/aggregators/main'
10
+ require 'pointmd_comments/aggregators/posts'
11
+ require 'pointmd_comments/aggregators/comments'
12
+
13
+ require 'pointmd_comments/errors/not_implemented'
14
+ require 'pointmd_comments/errors/unknown_source'
15
+
16
+ require 'pry'
17
+
18
+ module PointmdComments
19
+ class Error < StandardError; end
20
+
21
+ def self.collect
22
+ # DOESN'T WORK!!
23
+ args = ARGV.dup
24
+ puts args
25
+ options = OptParser.new.parse
26
+ Aggregators::Main.new(options).call
27
+ rescue StandardError => e
28
+ puts e.class, e.message
29
+ puts e.backtrace if args.include? '-v'
30
+ end
31
+ end
@@ -0,0 +1,21 @@
1
+ module PointmdComments
2
+ module Aggregators
3
+ class Comments
4
+ def call(page)
5
+ parse_comments(page)
6
+ end
7
+
8
+ private
9
+
10
+ def parse_comments(page)
11
+ comments = page.css('div#simpals-comments-list').children.first.children
12
+ comments.map do |comment|
13
+ comment.css('p').children.last.text
14
+
15
+ rescue StandardError
16
+ next
17
+ end.compact
18
+ end
19
+ end
20
+ end
21
+ end
@@ -0,0 +1,53 @@
1
+ module PointmdComments
2
+ module Aggregators
3
+ class Main
4
+ attr_reader :posts_aggregator, :browser, :comments_aggregator, :all_comments, :posts, :source, :path
5
+
6
+ def initialize(options)
7
+ @path = options[:path]
8
+ @source = options[:source]
9
+ @posts_aggregator = Aggregators::Posts.new(source: source, path: path)
10
+ @comments_aggregator = Aggregators::Comments.new
11
+ @browser = ::Watir::Browser.new :chrome, headless: true
12
+ @all_comments = []
13
+ end
14
+
15
+ def call
16
+ @posts = posts_aggregator.call
17
+
18
+ collect_all_comments
19
+
20
+ write_to_csv
21
+ end
22
+
23
+ private
24
+
25
+ def collect_all_comments
26
+ posts.each do |url|
27
+ post_comments = collect_comments_from(url)
28
+
29
+ post_comments.each do |c|
30
+ all_comments << ["https://point.md#{url}", c]
31
+ end
32
+ end
33
+ end
34
+
35
+ def collect_comments_from(url)
36
+ puts "Collecting comments for #{url}.."
37
+
38
+ browser.goto "https://point.md#{url}"
39
+ browser.div(id: 'simpals-comments-list').wait_until(&:present?)
40
+
41
+ page = Nokogiri::HTML.parse(browser.html)
42
+
43
+ comments_aggregator.call(page)
44
+ end
45
+
46
+ def write_to_csv
47
+ CSV.open('output.csv', 'w') do |csv|
48
+ all_comments.each { |c| csv << c }
49
+ end
50
+ end
51
+ end
52
+ end
53
+ end
@@ -0,0 +1,67 @@
1
+ module PointmdComments
2
+ module Aggregators
3
+ class Posts
4
+ # NOTE: This array may be populated with other website sections in the future.
5
+ ALLOWED_SOURCES = %i[news today].freeze
6
+ MAIN_PAGE = 'https://point.md/ru/'.freeze
7
+
8
+ attr_reader :source, :urls
9
+
10
+ def initialize(source:, path:)
11
+ @source = source
12
+ @path = path
13
+ @urls = []
14
+ end
15
+
16
+ def call
17
+ validate_source
18
+ fetch_posts
19
+ puts "Found #{urls.count} links in the #{source} section.."
20
+
21
+ @urls
22
+ end
23
+
24
+ private
25
+
26
+ def validate_source
27
+ return if ALLOWED_SOURCES.include? source
28
+
29
+ raise ArgumentError, "Wrong source. Allowed sources are #{ALLOWED_SOURCES}"
30
+ end
31
+
32
+ def fetch_posts
33
+ @page = download_html
34
+ case source
35
+ when :news
36
+ fetch_news_posts
37
+ when :today
38
+ raise Errors::NotImplemented
39
+ end
40
+ end
41
+
42
+ def fetch_news_posts
43
+ posts_block = @page.css('.post-blocks-wrap')
44
+ main_post_heading = posts_block.children.css('.post-big-block').children.css('h2')
45
+
46
+ main_post = main_post_heading.children.css('a').first['href']
47
+ other_posts = posts_block.children.css('.post-small-blocks-wrap')
48
+
49
+ populate_urls(main_post, other_posts)
50
+ end
51
+
52
+ def populate_urls(main_post, other_posts)
53
+ @urls << main_post
54
+
55
+ @urls += other_posts.children.map do |child|
56
+ child.css('article').css('h2').children[1]['href']
57
+ rescue StandardError
58
+ next
59
+ end.compact
60
+ end
61
+
62
+ def download_html
63
+ Nokogiri::HTML(open('https://point.md/ru'))
64
+ end
65
+ end
66
+ end
67
+ end
@@ -0,0 +1,5 @@
1
+ module PointmdComments
2
+ module Errors
3
+ class NotImplemented < StandardError; end
4
+ end
5
+ end
@@ -0,0 +1,13 @@
1
+ module PointmdComments
2
+ module Errors
3
+ class UnknownSource < StandardError
4
+ attr_reader :message
5
+
6
+ def initialize
7
+ super
8
+
9
+ @message = "Unknown source specified. Allowed sources are: #{Aggregators::Posts::ALLOWED_SOURCES}"
10
+ end
11
+ end
12
+ end
13
+ end
@@ -0,0 +1,60 @@
1
+ require 'optparse'
2
+ require 'pointmd_comments/version'
3
+
4
+ module PointmdComments
5
+ class OptParser
6
+ attr_reader :options
7
+
8
+ def initialize
9
+ @options = {}
10
+ end
11
+
12
+ # rubocop:disable Metrics/MethodLength, Metrics/AbcSize
13
+ def parse
14
+ OptionParser.new do |opts|
15
+ opts.banner = 'Usage: pointmd_comments [options]'
16
+
17
+ opts.on('-v', '--verbose', 'Show logs and backtraces') do |v|
18
+ options[:verbose] = v
19
+ end
20
+
21
+ opts.on(
22
+ '-sSOURCE',
23
+ '--source=SOURCE',
24
+ "A source to pull links from. Available sources are: #{Aggregators::Posts::ALLOWED_SOURCES}"
25
+ ) do |s|
26
+ options[:source] = s.to_sym
27
+ end
28
+ opts.on(
29
+ '-p PATH',
30
+ '--path=PATH',
31
+ 'A file path to pull links from.'
32
+ ) do |s|
33
+ options[:source] = s.to_sym
34
+ end
35
+ opts.on('-V', '--version', 'Version') do
36
+ puts PointmdComments::VERSION
37
+ exit
38
+ end
39
+ end.parse!
40
+
41
+ set_default_source unless options[:source] && options[:path]
42
+ validate_source
43
+
44
+ options
45
+ end
46
+ # rubocop:enable Metrics/MethodLength, Metrics/AbcSize
47
+
48
+ private
49
+
50
+ def set_default_source
51
+ options[:source] = :news
52
+ end
53
+
54
+ def validate_source
55
+ return if Aggregators::Posts::ALLOWED_SOURCES.include? options[:source]
56
+
57
+ raise Errors::UnknownSource
58
+ end
59
+ end
60
+ end
@@ -0,0 +1,3 @@
1
+ module PointmdComments
2
+ VERSION = '0.1.0'.freeze
3
+ end
metadata ADDED
@@ -0,0 +1,127 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: pointmd_comments
3
+ version: !ruby/object:Gem::Version
4
+ version: 0.1.0
5
+ platform: ruby
6
+ authors:
7
+ - Nicolai Stoianov
8
+ autorequire:
9
+ bindir: bin
10
+ cert_chain: []
11
+ date: 2020-11-11 00:00:00.000000000 Z
12
+ dependencies:
13
+ - !ruby/object:Gem::Dependency
14
+ name: pry
15
+ requirement: !ruby/object:Gem::Requirement
16
+ requirements:
17
+ - - ">="
18
+ - !ruby/object:Gem::Version
19
+ version: '0'
20
+ type: :development
21
+ prerelease: false
22
+ version_requirements: !ruby/object:Gem::Requirement
23
+ requirements:
24
+ - - ">="
25
+ - !ruby/object:Gem::Version
26
+ version: '0'
27
+ - !ruby/object:Gem::Dependency
28
+ name: rubocop
29
+ requirement: !ruby/object:Gem::Requirement
30
+ requirements:
31
+ - - '='
32
+ - !ruby/object:Gem::Version
33
+ version: 1.1.0
34
+ type: :development
35
+ prerelease: false
36
+ version_requirements: !ruby/object:Gem::Requirement
37
+ requirements:
38
+ - - '='
39
+ - !ruby/object:Gem::Version
40
+ version: 1.1.0
41
+ - !ruby/object:Gem::Dependency
42
+ name: nokogiri
43
+ requirement: !ruby/object:Gem::Requirement
44
+ requirements:
45
+ - - "~>"
46
+ - !ruby/object:Gem::Version
47
+ version: '1.10'
48
+ type: :runtime
49
+ prerelease: false
50
+ version_requirements: !ruby/object:Gem::Requirement
51
+ requirements:
52
+ - - "~>"
53
+ - !ruby/object:Gem::Version
54
+ version: '1.10'
55
+ - !ruby/object:Gem::Dependency
56
+ name: watir
57
+ requirement: !ruby/object:Gem::Requirement
58
+ requirements:
59
+ - - ">="
60
+ - !ruby/object:Gem::Version
61
+ version: '0'
62
+ type: :runtime
63
+ prerelease: false
64
+ version_requirements: !ruby/object:Gem::Requirement
65
+ requirements:
66
+ - - ">="
67
+ - !ruby/object:Gem::Version
68
+ version: '0'
69
+ - !ruby/object:Gem::Dependency
70
+ name: watir-webdriver
71
+ requirement: !ruby/object:Gem::Requirement
72
+ requirements:
73
+ - - ">="
74
+ - !ruby/object:Gem::Version
75
+ version: '0'
76
+ type: :runtime
77
+ prerelease: false
78
+ version_requirements: !ruby/object:Gem::Requirement
79
+ requirements:
80
+ - - ">="
81
+ - !ruby/object:Gem::Version
82
+ version: '0'
83
+ description: This gem lets you aggregate comments from point.md website into a csv
84
+ file.
85
+ email:
86
+ - stoianovnk@gmail.com
87
+ executables:
88
+ - pointmd_comments
89
+ extensions: []
90
+ extra_rdoc_files: []
91
+ files:
92
+ - bin/pointmd_comments
93
+ - lib/pointmd_comments.rb
94
+ - lib/pointmd_comments/aggregators/comments.rb
95
+ - lib/pointmd_comments/aggregators/main.rb
96
+ - lib/pointmd_comments/aggregators/posts.rb
97
+ - lib/pointmd_comments/errors/not_implemented.rb
98
+ - lib/pointmd_comments/errors/unknown_source.rb
99
+ - lib/pointmd_comments/opt_parser.rb
100
+ - lib/pointmd_comments/version.rb
101
+ homepage: https://github.com/ston1x/pointmd-comments
102
+ licenses:
103
+ - MIT
104
+ metadata:
105
+ allowed_push_host: https://rubygems.org
106
+ homepage_uri: https://github.com/ston1x/pointmd-comments
107
+ source_code_uri: https://github.com/ston1x/pointmd-comments
108
+ post_install_message:
109
+ rdoc_options: []
110
+ require_paths:
111
+ - lib
112
+ required_ruby_version: !ruby/object:Gem::Requirement
113
+ requirements:
114
+ - - ">="
115
+ - !ruby/object:Gem::Version
116
+ version: '2.5'
117
+ required_rubygems_version: !ruby/object:Gem::Requirement
118
+ requirements:
119
+ - - ">="
120
+ - !ruby/object:Gem::Version
121
+ version: '0'
122
+ requirements: []
123
+ rubygems_version: 3.1.2
124
+ signing_key:
125
+ specification_version: 4
126
+ summary: Point.md comments aggregator
127
+ test_files: []