telegram_web_proxy 0.1.0 → 0.2.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/.gitlab-ci.yml +15 -0
- data/.rubocop.yml +28 -0
- data/Gemfile +2 -0
- data/README.md +9 -3
- data/Rakefile +5 -3
- data/bin/console +4 -3
- data/lib/telegram_web_proxy.rb +5 -0
- data/lib/telegram_web_proxy/keyboard_generator.rb +21 -11
- data/lib/telegram_web_proxy/message_processor.rb +89 -40
- data/lib/telegram_web_proxy/page_fetcher.rb +5 -41
- data/lib/telegram_web_proxy/page_sanitizer.rb +93 -0
- data/lib/telegram_web_proxy/version.rb +3 -1
- data/telegram_web_proxy.gemspec +19 -14
- metadata +34 -3
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 667a4890042a1e9320469234c49f099c0f7b17e7
|
4
|
+
data.tar.gz: 48aa4d83954e65cf57bd7fee48d661c63d0b5ac5
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: fac1a4cf2cadc198bf49d254f2ee130037bcc0e83b5c476fceb2bf731211e1fcf7467268e8ca0a61d2eafd24754a93fbab47426c1c826b76ab341e1d26403298
|
7
|
+
data.tar.gz: bf0c5c5fd16631ec22f04e6cbf26aa6273a3edccddda55b2bea496afd42289628882115779808636eb02955009ad23923b6d262d635f5c6db64ceb4fee303267
|
data/.gitlab-ci.yml
ADDED
@@ -0,0 +1,15 @@
|
|
1
|
+
image: ruby:2.4
|
2
|
+
|
3
|
+
before_script:
|
4
|
+
- ruby -v
|
5
|
+
- which ruby
|
6
|
+
- gem install bundler --no-ri --no-rdoc
|
7
|
+
- bundle install --jobs $(nproc) "${FLAGS[@]}"
|
8
|
+
|
9
|
+
rspec:
|
10
|
+
script:
|
11
|
+
- bundle exec rspec --format documentation --color
|
12
|
+
|
13
|
+
rubocop:
|
14
|
+
script:
|
15
|
+
- bundle exec rubocop
|
data/.rubocop.yml
ADDED
@@ -0,0 +1,28 @@
|
|
1
|
+
require: rubocop-rspec
|
2
|
+
|
3
|
+
AllCops:
|
4
|
+
TargetRubyVersion: 2.4
|
5
|
+
|
6
|
+
Metrics/BlockLength:
|
7
|
+
Enabled: true
|
8
|
+
ExcludedMethods: ['new'] # for SomeClass = Struct.new(:param) do
|
9
|
+
Exclude:
|
10
|
+
- 'spec/**/*'
|
11
|
+
|
12
|
+
Style/AndOr:
|
13
|
+
EnforcedStyle: conditionals
|
14
|
+
|
15
|
+
Layout/DotPosition:
|
16
|
+
EnforcedStyle: trailing
|
17
|
+
|
18
|
+
Metrics/LineLength:
|
19
|
+
Max: 90
|
20
|
+
|
21
|
+
Layout/SpaceInsideHashLiteralBraces:
|
22
|
+
EnforcedStyle: no_space
|
23
|
+
|
24
|
+
Style/SingleLineMethods:
|
25
|
+
Enabled: false
|
26
|
+
|
27
|
+
RSpec/NestedGroups:
|
28
|
+
Max: 4
|
data/Gemfile
CHANGED
data/README.md
CHANGED
@@ -22,7 +22,14 @@ Or install it yourself as:
|
|
22
22
|
|
23
23
|
## Usage
|
24
24
|
|
25
|
-
|
25
|
+
Telegram::Bot::Client.run(token) do |bot|
|
26
|
+
processor = TelegramWebProxy.new(bot)
|
27
|
+
bot.listen do |message|
|
28
|
+
processor.process(message)
|
29
|
+
end
|
30
|
+
end
|
31
|
+
|
32
|
+
Where token is your token for the telegram bot.
|
26
33
|
|
27
34
|
## Development
|
28
35
|
|
@@ -32,10 +39,9 @@ To install this gem onto your local machine, run `bundle exec rake install`. To
|
|
32
39
|
|
33
40
|
## Contributing
|
34
41
|
|
35
|
-
Bug reports and pull requests are welcome on
|
42
|
+
Bug reports and pull requests are welcome on GitLab at https://gitlab.com/x86d0cent/telegram_web_proxy. This project is intended to be a safe, welcoming space for collaboration, and contributors are expected to adhere to the [Contributor Covenant](http://contributor-covenant.org) code of conduct.
|
36
43
|
|
37
44
|
|
38
45
|
## License
|
39
46
|
|
40
47
|
The gem is available as open source under the terms of the [MIT License](http://opensource.org/licenses/MIT).
|
41
|
-
|
data/Rakefile
CHANGED
data/bin/console
CHANGED
@@ -1,7 +1,8 @@
|
|
1
1
|
#!/usr/bin/env ruby
|
2
|
+
# frozen_string_literal: true
|
2
3
|
|
3
|
-
require
|
4
|
-
require
|
4
|
+
require 'bundler/setup'
|
5
|
+
require 'telegram_web_proxy'
|
5
6
|
|
6
7
|
# You can add fixtures and/or initialization code here to make experimenting
|
7
8
|
# with your gem easier. You can also use a different console, if you like.
|
@@ -10,5 +11,5 @@ require "telegram_web_proxy"
|
|
10
11
|
# require "pry"
|
11
12
|
# Pry.start
|
12
13
|
|
13
|
-
require
|
14
|
+
require 'irb'
|
14
15
|
IRB.start(__FILE__)
|
data/lib/telegram_web_proxy.rb
CHANGED
@@ -1,8 +1,11 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
1
3
|
require 'telegram/bot'
|
2
4
|
require 'uri'
|
3
5
|
require 'rest-client'
|
4
6
|
require 'sanitize'
|
5
7
|
|
8
|
+
# WebProxy for a Telegram bot which handles chats and process messages
|
6
9
|
class TelegramWebProxy
|
7
10
|
attr_accessor :bot
|
8
11
|
|
@@ -21,6 +24,7 @@ class TelegramWebProxy
|
|
21
24
|
end
|
22
25
|
|
23
26
|
private
|
27
|
+
|
24
28
|
def get_chat(message)
|
25
29
|
message.respond_to?(:chat) ? message.chat : message.message.chat
|
26
30
|
end
|
@@ -28,5 +32,6 @@ end
|
|
28
32
|
|
29
33
|
require File.dirname(__FILE__) + '/telegram_web_proxy/message_processor'
|
30
34
|
require File.dirname(__FILE__) + '/telegram_web_proxy/page_fetcher'
|
35
|
+
require File.dirname(__FILE__) + '/telegram_web_proxy/page_sanitizer'
|
31
36
|
require File.dirname(__FILE__) + '/telegram_web_proxy/keyboard_generator'
|
32
37
|
require File.dirname(__FILE__) + '/telegram_web_proxy/version'
|
@@ -1,33 +1,42 @@
|
|
1
|
-
|
2
|
-
class TelegramWebProxy::KeyboardGenerator
|
3
|
-
MAX_BUTTONS = 8
|
1
|
+
# frozen_string_literal: true
|
4
2
|
|
3
|
+
TelegramWebProxy::KeyboardGenerator = Struct.new(:fragments_number, :current_fragment) do
|
4
|
+
MAX_BUTTONS = 8
|
5
5
|
|
6
6
|
# @return [Telegram::Bot::Types::InlineKeyboardMarkup]
|
7
7
|
def generate
|
8
|
-
@keyboard ||=
|
8
|
+
@keyboard ||=
|
9
|
+
Telegram::Bot::Types::InlineKeyboardMarkup.new(inline_keyboard: build_keyboard)
|
9
10
|
end
|
10
11
|
|
11
12
|
private
|
12
13
|
|
13
14
|
def build_keyboard
|
14
|
-
[page_numbers.map{|i| new_button(i)}]
|
15
|
+
[page_numbers.map { |i| new_button(i) }]
|
15
16
|
end
|
16
17
|
|
17
18
|
def page_numbers
|
18
19
|
if fragments_number <= MAX_BUTTONS
|
19
20
|
full_visible
|
20
|
-
elsif
|
21
|
+
elsif current_fragment_in_the_beginning?
|
21
22
|
beginning_visible
|
22
|
-
elsif
|
23
|
+
elsif current_fragment_in_the_ending?
|
23
24
|
ending_visible
|
24
25
|
else
|
25
26
|
middle_visible
|
26
27
|
end
|
27
28
|
end
|
28
29
|
|
30
|
+
def current_fragment_in_the_beginning?
|
31
|
+
current_fragment <= MAX_BUTTONS / 2 - 1
|
32
|
+
end
|
33
|
+
|
34
|
+
def current_fragment_in_the_ending?
|
35
|
+
(fragments_number - current_fragment) <= MAX_BUTTONS / 2
|
36
|
+
end
|
37
|
+
|
29
38
|
def full_visible
|
30
|
-
0
|
39
|
+
0..fragments_number - 1
|
31
40
|
end
|
32
41
|
|
33
42
|
def beginning_visible
|
@@ -36,14 +45,15 @@ class TelegramWebProxy::KeyboardGenerator
|
|
36
45
|
|
37
46
|
def middle_visible
|
38
47
|
range = MAX_BUTTONS / 4
|
39
|
-
[0] +
|
48
|
+
[0] +
|
49
|
+
(current_fragment - range..current_fragment + range).to_a +
|
50
|
+
[fragments_number - 1]
|
40
51
|
end
|
41
52
|
|
42
53
|
def ending_visible
|
43
|
-
[0] + (fragments_number - MAX_BUTTONS + 2
|
54
|
+
[0] + (fragments_number - MAX_BUTTONS + 2..fragments_number - 1).to_a
|
44
55
|
end
|
45
56
|
|
46
|
-
private
|
47
57
|
def new_button(idx)
|
48
58
|
Telegram::Bot::Types::InlineKeyboardButton.new(text: button_text(idx),
|
49
59
|
callback_data: "/f #{idx}")
|
@@ -1,6 +1,21 @@
|
|
1
|
-
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
TelegramWebProxy::MessageProcessor = Struct.new(:bot, :chat) do
|
4
|
+
MAX_FRAGMENT_SIZE = 4096
|
5
|
+
WEBSEARCH_URL = 'https://duckduckgo.com/lite'
|
6
|
+
HELP_TEXT =
|
7
|
+
<<~MSG
|
8
|
+
Supported requests:
|
9
|
+
/h - help
|
10
|
+
/o URL (or just URL without /o) - open URL
|
11
|
+
/s REQUEST - search with DuckDuckGo
|
12
|
+
/e - echo
|
13
|
+
/t - just a test
|
14
|
+
MSG
|
15
|
+
# /f is possible too
|
16
|
+
|
17
|
+
TEST_TEXT = "A ! <a href='tg:msg?text=test&to=wprox_bot'> click here</a> <a href='https://techcrunch.com/2017/07/14/startup-battlefield-application-deadline-extended-for-disrupt-sf-2/'>TC instant view</a> <a href='http://reddit.com'>Reddit</a>." # rubocop:disable Metrics/LineLength
|
2
18
|
|
3
|
-
class TelegramWebProxy::MessageProcessor
|
4
19
|
attr_accessor :message, :page
|
5
20
|
|
6
21
|
def process(message)
|
@@ -20,37 +35,53 @@ class TelegramWebProxy::MessageProcessor
|
|
20
35
|
end
|
21
36
|
end
|
22
37
|
|
23
|
-
def process_message
|
38
|
+
def process_message # rubocop:disable Metrics/MethodLength, Metrics/CyclomaticComplexity
|
24
39
|
command, args = message.text.split(' ', 2)
|
25
40
|
case command
|
26
41
|
when '/h'
|
27
|
-
send_text(text:
|
42
|
+
send_text(text: HELP_TEXT)
|
28
43
|
when '/o'
|
29
|
-
|
30
|
-
send_large_html(TelegramWebProxy::PageFetcher.new(args).get)
|
44
|
+
open_page(args)
|
31
45
|
when '/s'
|
32
|
-
|
33
|
-
send_large_html(TelegramWebProxy::PageFetcher.new('https://duckduckgo.com/lite').post(q: args))
|
46
|
+
web_search(args)
|
34
47
|
when '/e'
|
35
|
-
# puts "Echo: #{args}"
|
36
48
|
send_text(text: args)
|
37
|
-
when '/f' #actually user shouldn't use this manually, see #process_callback
|
49
|
+
when '/f' # actually user shouldn't use this manually, see #process_callback
|
38
50
|
send_fragment(args.to_i)
|
39
51
|
when '/'
|
40
52
|
puts 'Do nothing'
|
41
53
|
when '/t'
|
42
|
-
text
|
43
|
-
send_text(text: text)#, parse_mode: 'HTML', reply_markup: markup)
|
54
|
+
send_text(text: TEST_TEXT)
|
44
55
|
else
|
45
|
-
|
46
|
-
puts "Trying to open page: #{command}"
|
47
|
-
send_large_html(TelegramWebProxy::PageFetcher.new(command).get)
|
48
|
-
end
|
56
|
+
process_unknown_command(command)
|
49
57
|
end
|
50
58
|
end
|
51
59
|
|
52
60
|
private
|
53
61
|
|
62
|
+
def looks_like_url?(str)
|
63
|
+
str =~ URI.regexp ||
|
64
|
+
str =~ /\A[\w-]+\.[\w-]+(\.[\w-]+)*\z/
|
65
|
+
end
|
66
|
+
|
67
|
+
# FIXME: check if given command is looks like an url
|
68
|
+
def process_unknown_command(command)
|
69
|
+
unless looks_like_url?(command)
|
70
|
+
send_text(text: 'Not a command or URL')
|
71
|
+
else
|
72
|
+
puts "Trying to open page: #{command}"
|
73
|
+
send_object(page_fetcher(command).get)
|
74
|
+
end
|
75
|
+
end
|
76
|
+
|
77
|
+
def open_page(args)
|
78
|
+
send_object(page_fetcher(args).get)
|
79
|
+
end
|
80
|
+
|
81
|
+
def web_search(args)
|
82
|
+
send_object(page_fetcher(WEBSEARCH_URL).post(q: args))
|
83
|
+
end
|
84
|
+
|
54
85
|
def send_text(text:, reply_markup: nil, parse_mode: 'HTML')
|
55
86
|
bot.api.send_message(chat_id: chat.id,
|
56
87
|
text: text,
|
@@ -64,39 +95,57 @@ class TelegramWebProxy::MessageProcessor
|
|
64
95
|
# @param fragment [Integer] the number of the fragment to send
|
65
96
|
#
|
66
97
|
def send_fragment(fragment)
|
67
|
-
keyboard_markup =
|
98
|
+
keyboard_markup =
|
99
|
+
TelegramWebProxy::KeyboardGenerator.new(page.size, fragment).generate
|
68
100
|
begin
|
69
101
|
send_text(text: page[fragment], reply_markup: keyboard_markup)
|
70
|
-
rescue
|
71
|
-
|
72
|
-
text = "OOPS! Something wrong with fragment #{fragment + 1}. Here is a previous fragment: \n" + page[fragment - 1]
|
73
|
-
send_text(text: text, reply_markup: keyboard_markup)
|
74
|
-
else
|
75
|
-
send_text(text: "Error while sending fragment #{fragment + 1}")
|
76
|
-
end
|
102
|
+
rescue Telegram::Bot::Exceptions::ResponseError => e
|
103
|
+
handle_response_error(e, fragment, keyboard_markup)
|
77
104
|
end
|
78
105
|
end
|
79
106
|
|
80
|
-
def
|
81
|
-
|
82
|
-
|
83
|
-
|
84
|
-
|
85
|
-
|
86
|
-
|
87
|
-
while
|
88
|
-
end_pos -= 1
|
89
|
-
end
|
90
|
-
result << page[start_pos..end_pos] if start_pos < end_pos
|
91
|
-
start_pos = end_pos + 1
|
107
|
+
def handle_response_error(e, fragment, keyboard_markup)
|
108
|
+
if e.error_code.to_s == '400'
|
109
|
+
send_text(
|
110
|
+
text: "Error with fragment #{fragment + 1}. Previous one:\n" + page[fragment - 1],
|
111
|
+
reply_markup: keyboard_markup
|
112
|
+
)
|
113
|
+
else
|
114
|
+
send_text(text: "Error while sending fragment #{fragment + 1}")
|
92
115
|
end
|
93
|
-
result
|
94
116
|
end
|
95
117
|
|
118
|
+
def split_into_parts(page)
|
119
|
+
return [] if page.empty?
|
120
|
+
size = [page.size, MAX_FRAGMENT_SIZE].min
|
121
|
+
size = page[0, size].rindex('<') until valid_segment?(page[0, size])
|
122
|
+
raise "INVALID FRAGMENT near '#{page[0..500]}...'" unless size.positive?
|
123
|
+
[page[0, size], *split_into_parts(page[size..-1])]
|
124
|
+
end
|
125
|
+
|
126
|
+
# Checks if the segment of the page is valid and might be taken as a separate fragment
|
127
|
+
def valid_segment?(page_segment)
|
128
|
+
Nokogiri::XML("<e>#{page_segment}</e>").errors.empty?
|
129
|
+
end
|
130
|
+
|
131
|
+
def send_object(rest_response)
|
132
|
+
case rest_response.headers[:content_type].split('/')[0]
|
133
|
+
when 'text'
|
134
|
+
split_and_send(TelegramWebProxy::PageSanitizer.new(rest_response).sanitize)
|
135
|
+
when 'image'
|
136
|
+
send_text(text: '^')
|
137
|
+
else
|
138
|
+
send_text(text: 'Only text or image content is supported')
|
139
|
+
end
|
140
|
+
end
|
96
141
|
|
97
|
-
# Splits
|
98
|
-
def
|
99
|
-
self.page = split_into_parts(
|
142
|
+
# Splits fetched HTML and sends the first fragment
|
143
|
+
def split_and_send(html_doc)
|
144
|
+
self.page = split_into_parts(html_doc)
|
100
145
|
send_fragment(0)
|
101
146
|
end
|
147
|
+
|
148
|
+
def page_fetcher(url)
|
149
|
+
TelegramWebProxy::PageFetcher.new(url)
|
150
|
+
end
|
102
151
|
end
|
@@ -1,50 +1,14 @@
|
|
1
|
-
|
1
|
+
# coding: utf-8
|
2
|
+
# frozen_string_literal: true
|
2
3
|
|
3
|
-
|
4
|
+
TelegramWebProxy::PageFetcher = Struct.new(:url) do
|
4
5
|
HTTP_USER_AGENT = 'ELinks/0.13.GIT (textmode; Linux 2.6.29 i686; 119x51-2)'
|
5
6
|
|
6
7
|
def get
|
7
|
-
|
8
|
-
fix_relative_urls(sanitize_page(result.body), url)
|
8
|
+
RestClient.get(url, user_agent: HTTP_USER_AGENT)
|
9
9
|
end
|
10
10
|
|
11
11
|
def post(data)
|
12
|
-
|
13
|
-
fix_relative_urls(sanitize_page(result.body), url)
|
12
|
+
RestClient.post(url, data, user_agent: HTTP_USER_AGENT)
|
14
13
|
end
|
15
|
-
|
16
|
-
private
|
17
|
-
# Takes out most of HTML tags and unnecesary whitespaces
|
18
|
-
def sanitize_page(body)
|
19
|
-
Sanitize.fragment(body,
|
20
|
-
# these tags are allowed for Teregram, however,
|
21
|
-
# nested tags are not allowed.
|
22
|
-
# Would be great to resolve this issue later.
|
23
|
-
# elements: ['b', 'strong', 'i', 'em', 'a', 'code', 'pre'],
|
24
|
-
elements: ['a'],
|
25
|
-
attributes: {'a' => ['href']},
|
26
|
-
remove_contents: [:script, :style, :img],
|
27
|
-
whitespace_elements: {
|
28
|
-
'br' => {before: "\n", after: ""},
|
29
|
-
'div' => {before: "\n", after: "\n"},
|
30
|
-
'p' => {before: "\n", after: "\n"}
|
31
|
-
}).
|
32
|
-
gsub(/\t+/, " ").
|
33
|
-
gsub(/ +/, " ").
|
34
|
-
gsub(/\n+/, "\n").
|
35
|
-
gsub(/( \n)+/, " \n").
|
36
|
-
gsub("\n \n", "\n")
|
37
|
-
end
|
38
|
-
|
39
|
-
|
40
|
-
# Replaces relative urls to absolute.
|
41
|
-
#
|
42
|
-
# @param text [String] original text
|
43
|
-
# @param url [String] url from where the page was fetched
|
44
|
-
# @return [String]
|
45
|
-
def fix_relative_urls(text, url)
|
46
|
-
uri = URI.parse(url)
|
47
|
-
text.gsub('ref="/', "ref=\"#{uri.scheme || 'http'}://#{uri.host || uri.path}/")
|
48
|
-
end
|
49
|
-
|
50
14
|
end
|
@@ -0,0 +1,93 @@
|
|
1
|
+
# coding: utf-8
|
2
|
+
# frozen_string_literal: true
|
3
|
+
|
4
|
+
TelegramWebProxy::PageSanitizer = Struct.new(:response) do
|
5
|
+
WHITESPACE_ELEMENTS = {'br' => {before: "\n", after: ''},
|
6
|
+
'div' => {before: "\n", after: "\n"},
|
7
|
+
'p' => {before: "\n", after: "\n"}}.freeze
|
8
|
+
|
9
|
+
def sanitize
|
10
|
+
fix_relative_urls(prepare_page(response.body), response.request.url)
|
11
|
+
end
|
12
|
+
|
13
|
+
private
|
14
|
+
|
15
|
+
# Takes out most of HTML tags and unnecesary whitespaces
|
16
|
+
def prepare_page(body)
|
17
|
+
remove_whitespaces(transform_img(remove_nested_tags(sanitize_tags(body))))
|
18
|
+
end
|
19
|
+
|
20
|
+
def image_description(attrs)
|
21
|
+
CGI.escapeHTML((attrs['alt'] || attrs['title'] || 'img').to_s[0..20])
|
22
|
+
end
|
23
|
+
|
24
|
+
def image_replacement(attrs)
|
25
|
+
return '' if attrs['width'].to_s == '1' && attrs['height'].to_s == '1'
|
26
|
+
"<a href=\"#{CGI.escapeHTML(attrs['src'])}\">〖#{image_description(attrs)}〗</a>"
|
27
|
+
end
|
28
|
+
|
29
|
+
# Replaces <img src=... alt=...> with <a href=img_src>img_alt</a>
|
30
|
+
def transform_img(text)
|
31
|
+
replacements = text.scan(/<\\?img [^>]*>/).map do |img_tag|
|
32
|
+
attrs = Nokogiri::HTML.fragment(img_tag).children[0].attributes
|
33
|
+
[img_tag, image_replacement(attrs)]
|
34
|
+
end
|
35
|
+
replacements.inject(text) { |str, tags| str.gsub(tags[0], tags[1]) }
|
36
|
+
end
|
37
|
+
|
38
|
+
def remove_nested_tags(text)
|
39
|
+
tree = Nokogiri::HTML.fragment(text)
|
40
|
+
[process_tree(tree)].flatten.map(&:to_s).join
|
41
|
+
end
|
42
|
+
|
43
|
+
def no_nested_tags?(tree, tag_name)
|
44
|
+
tree.children.none? { |c| c.enum_for(:traverse).map(&:name).include?(tag_name) }
|
45
|
+
end
|
46
|
+
|
47
|
+
# Makes the tree flat.
|
48
|
+
# It takes the top-level tag and remove all nested tags unless it has nested <a> tag
|
49
|
+
# (otherwise it takes <a> tag and remove all wrapping tags)
|
50
|
+
def process_tree(tree)
|
51
|
+
return tree if tree.instance_of? Nokogiri::XML::Text
|
52
|
+
if (tree.name == 'a' || no_nested_tags?(tree, 'a')) &&
|
53
|
+
!tree.instance_of?(Nokogiri::HTML::DocumentFragment)
|
54
|
+
text = tree.xpath('.//text()').to_s
|
55
|
+
tree.children.map(&:remove)
|
56
|
+
tree.add_child text
|
57
|
+
tree
|
58
|
+
else
|
59
|
+
tree.children.map { |child| process_tree(child) }
|
60
|
+
end
|
61
|
+
end
|
62
|
+
|
63
|
+
def sanitize_tags(body)
|
64
|
+
Sanitize.fragment(
|
65
|
+
body,
|
66
|
+
elements: %w[b strong i em a code pre img],
|
67
|
+
attributes: {'a' => ['href'], 'img' => %w[src alt title width height]},
|
68
|
+
remove_contents: %i[script style img],
|
69
|
+
whitespace_elements: WHITESPACE_ELEMENTS
|
70
|
+
)
|
71
|
+
end
|
72
|
+
|
73
|
+
def remove_whitespaces(text)
|
74
|
+
# FIXME: remove also linebreak from "\n</tag>\n" and "\n<tag>\n"
|
75
|
+
text.
|
76
|
+
gsub(/\t+/, ' ').
|
77
|
+
gsub(/ +/, ' ').
|
78
|
+
gsub(/( \n)+/, "\n").
|
79
|
+
gsub(/\n+/, "\n").
|
80
|
+
strip
|
81
|
+
end
|
82
|
+
|
83
|
+
# Replaces relative urls to absolute.
|
84
|
+
#
|
85
|
+
# @param text [String] original text
|
86
|
+
# @param url [String] url from where the page was fetched
|
87
|
+
# @return [String]
|
88
|
+
def fix_relative_urls(text, url)
|
89
|
+
uri = URI.parse(url)
|
90
|
+
text.gsub('ref="//', "ref=\"#{uri.scheme || 'http'}://").
|
91
|
+
gsub('ref="/', "ref=\"#{uri.scheme || 'http'}://#{uri.host || uri.path}/")
|
92
|
+
end
|
93
|
+
end
|
data/telegram_web_proxy.gemspec
CHANGED
@@ -1,20 +1,23 @@
|
|
1
1
|
# coding: utf-8
|
2
|
+
# frozen_string_literal: true
|
3
|
+
|
2
4
|
lib = File.expand_path('../lib', __FILE__)
|
3
5
|
$LOAD_PATH.unshift(lib) unless $LOAD_PATH.include?(lib)
|
4
6
|
require 'telegram_web_proxy/version'
|
5
7
|
|
6
8
|
Gem::Specification.new do |spec|
|
7
|
-
spec.name =
|
9
|
+
spec.name = 'telegram_web_proxy'
|
8
10
|
spec.version = TelegramWebProxy::VERSION
|
9
|
-
spec.authors = [
|
10
|
-
spec.email = [
|
11
|
+
spec.authors = ['Vladimir Maslov']
|
12
|
+
spec.email = ['x86d0cent@gmail.com']
|
11
13
|
|
12
|
-
spec.summary =
|
13
|
-
spec.description =
|
14
|
-
spec.homepage =
|
15
|
-
spec.license =
|
14
|
+
spec.summary = 'Web proxy for Telegram'
|
15
|
+
spec.description = 'A simple web proxy bot for Telegram'
|
16
|
+
spec.homepage = 'https://gitlab.com/x86d0cent/telegram_web_proxy'
|
17
|
+
spec.license = 'MIT'
|
16
18
|
|
17
|
-
# Prevent pushing this gem to RubyGems.org.
|
19
|
+
# Prevent pushing this gem to RubyGems.org.
|
20
|
+
# To allow pushes either set the 'allowed_push_host'
|
18
21
|
# to allow pushing to a single host or delete this section to allow pushing to any host.
|
19
22
|
# if spec.respond_to?(:metadata)
|
20
23
|
# spec.metadata['allowed_push_host'] = "TODO: Set to 'http://mygemserver.com'"
|
@@ -26,16 +29,18 @@ Gem::Specification.new do |spec|
|
|
26
29
|
spec.files = `git ls-files -z`.split("\x0").reject do |f|
|
27
30
|
f.match(%r{^(test|spec|features)/})
|
28
31
|
end
|
29
|
-
spec.bindir =
|
32
|
+
spec.bindir = 'exe'
|
30
33
|
spec.executables = spec.files.grep(%r{^exe/}) { |f| File.basename(f) }
|
31
|
-
spec.require_paths = [
|
34
|
+
spec.require_paths = ['lib']
|
35
|
+
spec.required_ruby_version = '>= 2.4.0'
|
32
36
|
|
33
|
-
spec.add_development_dependency
|
34
|
-
spec.add_development_dependency
|
35
|
-
spec.add_development_dependency
|
37
|
+
spec.add_development_dependency 'bundler', '~> 1.14'
|
38
|
+
spec.add_development_dependency 'rake', '~> 10.0'
|
39
|
+
spec.add_development_dependency 'rspec', '~> 3.0'
|
40
|
+
spec.add_development_dependency 'rubocop'
|
41
|
+
spec.add_development_dependency 'rubocop-rspec'
|
36
42
|
|
37
43
|
spec.add_dependency 'rest-client', '~> 2.0'
|
38
44
|
spec.add_dependency 'sanitize', '~> 4.5'
|
39
45
|
spec.add_dependency 'telegram-bot-ruby', '~> 0.8.0'
|
40
|
-
|
41
46
|
end
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: telegram_web_proxy
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.
|
4
|
+
version: 0.2.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Vladimir Maslov
|
8
8
|
autorequire:
|
9
9
|
bindir: exe
|
10
10
|
cert_chain: []
|
11
|
-
date: 2017-
|
11
|
+
date: 2017-09-14 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: bundler
|
@@ -52,6 +52,34 @@ dependencies:
|
|
52
52
|
- - "~>"
|
53
53
|
- !ruby/object:Gem::Version
|
54
54
|
version: '3.0'
|
55
|
+
- !ruby/object:Gem::Dependency
|
56
|
+
name: rubocop
|
57
|
+
requirement: !ruby/object:Gem::Requirement
|
58
|
+
requirements:
|
59
|
+
- - ">="
|
60
|
+
- !ruby/object:Gem::Version
|
61
|
+
version: '0'
|
62
|
+
type: :development
|
63
|
+
prerelease: false
|
64
|
+
version_requirements: !ruby/object:Gem::Requirement
|
65
|
+
requirements:
|
66
|
+
- - ">="
|
67
|
+
- !ruby/object:Gem::Version
|
68
|
+
version: '0'
|
69
|
+
- !ruby/object:Gem::Dependency
|
70
|
+
name: rubocop-rspec
|
71
|
+
requirement: !ruby/object:Gem::Requirement
|
72
|
+
requirements:
|
73
|
+
- - ">="
|
74
|
+
- !ruby/object:Gem::Version
|
75
|
+
version: '0'
|
76
|
+
type: :development
|
77
|
+
prerelease: false
|
78
|
+
version_requirements: !ruby/object:Gem::Requirement
|
79
|
+
requirements:
|
80
|
+
- - ">="
|
81
|
+
- !ruby/object:Gem::Version
|
82
|
+
version: '0'
|
55
83
|
- !ruby/object:Gem::Dependency
|
56
84
|
name: rest-client
|
57
85
|
requirement: !ruby/object:Gem::Requirement
|
@@ -102,7 +130,9 @@ extensions: []
|
|
102
130
|
extra_rdoc_files: []
|
103
131
|
files:
|
104
132
|
- ".gitignore"
|
133
|
+
- ".gitlab-ci.yml"
|
105
134
|
- ".rspec"
|
135
|
+
- ".rubocop.yml"
|
106
136
|
- ".travis.yml"
|
107
137
|
- CODE_OF_CONDUCT.md
|
108
138
|
- Gemfile
|
@@ -115,6 +145,7 @@ files:
|
|
115
145
|
- lib/telegram_web_proxy/keyboard_generator.rb
|
116
146
|
- lib/telegram_web_proxy/message_processor.rb
|
117
147
|
- lib/telegram_web_proxy/page_fetcher.rb
|
148
|
+
- lib/telegram_web_proxy/page_sanitizer.rb
|
118
149
|
- lib/telegram_web_proxy/version.rb
|
119
150
|
- telegram_web_proxy.gemspec
|
120
151
|
homepage: https://gitlab.com/x86d0cent/telegram_web_proxy
|
@@ -129,7 +160,7 @@ required_ruby_version: !ruby/object:Gem::Requirement
|
|
129
160
|
requirements:
|
130
161
|
- - ">="
|
131
162
|
- !ruby/object:Gem::Version
|
132
|
-
version:
|
163
|
+
version: 2.4.0
|
133
164
|
required_rubygems_version: !ruby/object:Gem::Requirement
|
134
165
|
requirements:
|
135
166
|
- - ">="
|