telegram_web_proxy 0.1.0 → 0.2.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/.gitlab-ci.yml +15 -0
- data/.rubocop.yml +28 -0
- data/Gemfile +2 -0
- data/README.md +9 -3
- data/Rakefile +5 -3
- data/bin/console +4 -3
- data/lib/telegram_web_proxy.rb +5 -0
- data/lib/telegram_web_proxy/keyboard_generator.rb +21 -11
- data/lib/telegram_web_proxy/message_processor.rb +89 -40
- data/lib/telegram_web_proxy/page_fetcher.rb +5 -41
- data/lib/telegram_web_proxy/page_sanitizer.rb +93 -0
- data/lib/telegram_web_proxy/version.rb +3 -1
- data/telegram_web_proxy.gemspec +19 -14
- metadata +34 -3
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 667a4890042a1e9320469234c49f099c0f7b17e7
|
4
|
+
data.tar.gz: 48aa4d83954e65cf57bd7fee48d661c63d0b5ac5
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: fac1a4cf2cadc198bf49d254f2ee130037bcc0e83b5c476fceb2bf731211e1fcf7467268e8ca0a61d2eafd24754a93fbab47426c1c826b76ab341e1d26403298
|
7
|
+
data.tar.gz: bf0c5c5fd16631ec22f04e6cbf26aa6273a3edccddda55b2bea496afd42289628882115779808636eb02955009ad23923b6d262d635f5c6db64ceb4fee303267
|
data/.gitlab-ci.yml
ADDED
@@ -0,0 +1,15 @@
|
|
1
|
+
image: ruby:2.4
|
2
|
+
|
3
|
+
before_script:
|
4
|
+
- ruby -v
|
5
|
+
- which ruby
|
6
|
+
- gem install bundler --no-ri --no-rdoc
|
7
|
+
- bundle install --jobs $(nproc) "${FLAGS[@]}"
|
8
|
+
|
9
|
+
rspec:
|
10
|
+
script:
|
11
|
+
- bundle exec rspec --format documentation --color
|
12
|
+
|
13
|
+
rubocop:
|
14
|
+
script:
|
15
|
+
- bundle exec rubocop
|
data/.rubocop.yml
ADDED
@@ -0,0 +1,28 @@
|
|
1
|
+
require: rubocop-rspec
|
2
|
+
|
3
|
+
AllCops:
|
4
|
+
TargetRubyVersion: 2.4
|
5
|
+
|
6
|
+
Metrics/BlockLength:
|
7
|
+
Enabled: true
|
8
|
+
ExcludedMethods: ['new'] # for SomeClass = Struct.new(:param) do
|
9
|
+
Exclude:
|
10
|
+
- 'spec/**/*'
|
11
|
+
|
12
|
+
Style/AndOr:
|
13
|
+
EnforcedStyle: conditionals
|
14
|
+
|
15
|
+
Layout/DotPosition:
|
16
|
+
EnforcedStyle: trailing
|
17
|
+
|
18
|
+
Metrics/LineLength:
|
19
|
+
Max: 90
|
20
|
+
|
21
|
+
Layout/SpaceInsideHashLiteralBraces:
|
22
|
+
EnforcedStyle: no_space
|
23
|
+
|
24
|
+
Style/SingleLineMethods:
|
25
|
+
Enabled: false
|
26
|
+
|
27
|
+
RSpec/NestedGroups:
|
28
|
+
Max: 4
|
data/Gemfile
CHANGED
data/README.md
CHANGED
@@ -22,7 +22,14 @@ Or install it yourself as:
|
|
22
22
|
|
23
23
|
## Usage
|
24
24
|
|
25
|
-
|
25
|
+
Telegram::Bot::Client.run(token) do |bot|
|
26
|
+
processor = TelegramWebProxy.new(bot)
|
27
|
+
bot.listen do |message|
|
28
|
+
processor.process(message)
|
29
|
+
end
|
30
|
+
end
|
31
|
+
|
32
|
+
Where token is your token for the telegram bot.
|
26
33
|
|
27
34
|
## Development
|
28
35
|
|
@@ -32,10 +39,9 @@ To install this gem onto your local machine, run `bundle exec rake install`. To
|
|
32
39
|
|
33
40
|
## Contributing
|
34
41
|
|
35
|
-
Bug reports and pull requests are welcome on
|
42
|
+
Bug reports and pull requests are welcome on GitLab at https://gitlab.com/x86d0cent/telegram_web_proxy. This project is intended to be a safe, welcoming space for collaboration, and contributors are expected to adhere to the [Contributor Covenant](http://contributor-covenant.org) code of conduct.
|
36
43
|
|
37
44
|
|
38
45
|
## License
|
39
46
|
|
40
47
|
The gem is available as open source under the terms of the [MIT License](http://opensource.org/licenses/MIT).
|
41
|
-
|
data/Rakefile
CHANGED
data/bin/console
CHANGED
@@ -1,7 +1,8 @@
|
|
1
1
|
#!/usr/bin/env ruby
|
2
|
+
# frozen_string_literal: true
|
2
3
|
|
3
|
-
require
|
4
|
-
require
|
4
|
+
require 'bundler/setup'
|
5
|
+
require 'telegram_web_proxy'
|
5
6
|
|
6
7
|
# You can add fixtures and/or initialization code here to make experimenting
|
7
8
|
# with your gem easier. You can also use a different console, if you like.
|
@@ -10,5 +11,5 @@ require "telegram_web_proxy"
|
|
10
11
|
# require "pry"
|
11
12
|
# Pry.start
|
12
13
|
|
13
|
-
require
|
14
|
+
require 'irb'
|
14
15
|
IRB.start(__FILE__)
|
data/lib/telegram_web_proxy.rb
CHANGED
@@ -1,8 +1,11 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
1
3
|
require 'telegram/bot'
|
2
4
|
require 'uri'
|
3
5
|
require 'rest-client'
|
4
6
|
require 'sanitize'
|
5
7
|
|
8
|
+
# WebProxy for a Telegram bot which handles chats and process messages
|
6
9
|
class TelegramWebProxy
|
7
10
|
attr_accessor :bot
|
8
11
|
|
@@ -21,6 +24,7 @@ class TelegramWebProxy
|
|
21
24
|
end
|
22
25
|
|
23
26
|
private
|
27
|
+
|
24
28
|
def get_chat(message)
|
25
29
|
message.respond_to?(:chat) ? message.chat : message.message.chat
|
26
30
|
end
|
@@ -28,5 +32,6 @@ end
|
|
28
32
|
|
29
33
|
require File.dirname(__FILE__) + '/telegram_web_proxy/message_processor'
|
30
34
|
require File.dirname(__FILE__) + '/telegram_web_proxy/page_fetcher'
|
35
|
+
require File.dirname(__FILE__) + '/telegram_web_proxy/page_sanitizer'
|
31
36
|
require File.dirname(__FILE__) + '/telegram_web_proxy/keyboard_generator'
|
32
37
|
require File.dirname(__FILE__) + '/telegram_web_proxy/version'
|
@@ -1,33 +1,42 @@
|
|
1
|
-
|
2
|
-
class TelegramWebProxy::KeyboardGenerator
|
3
|
-
MAX_BUTTONS = 8
|
1
|
+
# frozen_string_literal: true
|
4
2
|
|
3
|
+
TelegramWebProxy::KeyboardGenerator = Struct.new(:fragments_number, :current_fragment) do
|
4
|
+
MAX_BUTTONS = 8
|
5
5
|
|
6
6
|
# @return [Telegram::Bot::Types::InlineKeyboardMarkup]
|
7
7
|
def generate
|
8
|
-
@keyboard ||=
|
8
|
+
@keyboard ||=
|
9
|
+
Telegram::Bot::Types::InlineKeyboardMarkup.new(inline_keyboard: build_keyboard)
|
9
10
|
end
|
10
11
|
|
11
12
|
private
|
12
13
|
|
13
14
|
def build_keyboard
|
14
|
-
[page_numbers.map{|i| new_button(i)}]
|
15
|
+
[page_numbers.map { |i| new_button(i) }]
|
15
16
|
end
|
16
17
|
|
17
18
|
def page_numbers
|
18
19
|
if fragments_number <= MAX_BUTTONS
|
19
20
|
full_visible
|
20
|
-
elsif
|
21
|
+
elsif current_fragment_in_the_beginning?
|
21
22
|
beginning_visible
|
22
|
-
elsif
|
23
|
+
elsif current_fragment_in_the_ending?
|
23
24
|
ending_visible
|
24
25
|
else
|
25
26
|
middle_visible
|
26
27
|
end
|
27
28
|
end
|
28
29
|
|
30
|
+
def current_fragment_in_the_beginning?
|
31
|
+
current_fragment <= MAX_BUTTONS / 2 - 1
|
32
|
+
end
|
33
|
+
|
34
|
+
def current_fragment_in_the_ending?
|
35
|
+
(fragments_number - current_fragment) <= MAX_BUTTONS / 2
|
36
|
+
end
|
37
|
+
|
29
38
|
def full_visible
|
30
|
-
0
|
39
|
+
0..fragments_number - 1
|
31
40
|
end
|
32
41
|
|
33
42
|
def beginning_visible
|
@@ -36,14 +45,15 @@ class TelegramWebProxy::KeyboardGenerator
|
|
36
45
|
|
37
46
|
def middle_visible
|
38
47
|
range = MAX_BUTTONS / 4
|
39
|
-
[0] +
|
48
|
+
[0] +
|
49
|
+
(current_fragment - range..current_fragment + range).to_a +
|
50
|
+
[fragments_number - 1]
|
40
51
|
end
|
41
52
|
|
42
53
|
def ending_visible
|
43
|
-
[0] + (fragments_number - MAX_BUTTONS + 2
|
54
|
+
[0] + (fragments_number - MAX_BUTTONS + 2..fragments_number - 1).to_a
|
44
55
|
end
|
45
56
|
|
46
|
-
private
|
47
57
|
def new_button(idx)
|
48
58
|
Telegram::Bot::Types::InlineKeyboardButton.new(text: button_text(idx),
|
49
59
|
callback_data: "/f #{idx}")
|
@@ -1,6 +1,21 @@
|
|
1
|
-
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
TelegramWebProxy::MessageProcessor = Struct.new(:bot, :chat) do
|
4
|
+
MAX_FRAGMENT_SIZE = 4096
|
5
|
+
WEBSEARCH_URL = 'https://duckduckgo.com/lite'
|
6
|
+
HELP_TEXT =
|
7
|
+
<<~MSG
|
8
|
+
Supported requests:
|
9
|
+
/h - help
|
10
|
+
/o URL (or just URL without /o) - open URL
|
11
|
+
/s REQUEST - search with DuckDuckGo
|
12
|
+
/e - echo
|
13
|
+
/t - just a test
|
14
|
+
MSG
|
15
|
+
# /f is possible too
|
16
|
+
|
17
|
+
TEST_TEXT = "A ! <a href='tg:msg?text=test&to=wprox_bot'> click here</a> <a href='https://techcrunch.com/2017/07/14/startup-battlefield-application-deadline-extended-for-disrupt-sf-2/'>TC instant view</a> <a href='http://reddit.com'>Reddit</a>." # rubocop:disable Metrics/LineLength
|
2
18
|
|
3
|
-
class TelegramWebProxy::MessageProcessor
|
4
19
|
attr_accessor :message, :page
|
5
20
|
|
6
21
|
def process(message)
|
@@ -20,37 +35,53 @@ class TelegramWebProxy::MessageProcessor
|
|
20
35
|
end
|
21
36
|
end
|
22
37
|
|
23
|
-
def process_message
|
38
|
+
def process_message # rubocop:disable Metrics/MethodLength, Metrics/CyclomaticComplexity
|
24
39
|
command, args = message.text.split(' ', 2)
|
25
40
|
case command
|
26
41
|
when '/h'
|
27
|
-
send_text(text:
|
42
|
+
send_text(text: HELP_TEXT)
|
28
43
|
when '/o'
|
29
|
-
|
30
|
-
send_large_html(TelegramWebProxy::PageFetcher.new(args).get)
|
44
|
+
open_page(args)
|
31
45
|
when '/s'
|
32
|
-
|
33
|
-
send_large_html(TelegramWebProxy::PageFetcher.new('https://duckduckgo.com/lite').post(q: args))
|
46
|
+
web_search(args)
|
34
47
|
when '/e'
|
35
|
-
# puts "Echo: #{args}"
|
36
48
|
send_text(text: args)
|
37
|
-
when '/f' #actually user shouldn't use this manually, see #process_callback
|
49
|
+
when '/f' # actually user shouldn't use this manually, see #process_callback
|
38
50
|
send_fragment(args.to_i)
|
39
51
|
when '/'
|
40
52
|
puts 'Do nothing'
|
41
53
|
when '/t'
|
42
|
-
text
|
43
|
-
send_text(text: text)#, parse_mode: 'HTML', reply_markup: markup)
|
54
|
+
send_text(text: TEST_TEXT)
|
44
55
|
else
|
45
|
-
|
46
|
-
puts "Trying to open page: #{command}"
|
47
|
-
send_large_html(TelegramWebProxy::PageFetcher.new(command).get)
|
48
|
-
end
|
56
|
+
process_unknown_command(command)
|
49
57
|
end
|
50
58
|
end
|
51
59
|
|
52
60
|
private
|
53
61
|
|
62
|
+
def looks_like_url?(str)
|
63
|
+
str =~ URI.regexp ||
|
64
|
+
str =~ /\A[\w-]+\.[\w-]+(\.[\w-]+)*\z/
|
65
|
+
end
|
66
|
+
|
67
|
+
# FIXME: check if given command is looks like an url
|
68
|
+
def process_unknown_command(command)
|
69
|
+
unless looks_like_url?(command)
|
70
|
+
send_text(text: 'Not a command or URL')
|
71
|
+
else
|
72
|
+
puts "Trying to open page: #{command}"
|
73
|
+
send_object(page_fetcher(command).get)
|
74
|
+
end
|
75
|
+
end
|
76
|
+
|
77
|
+
def open_page(args)
|
78
|
+
send_object(page_fetcher(args).get)
|
79
|
+
end
|
80
|
+
|
81
|
+
def web_search(args)
|
82
|
+
send_object(page_fetcher(WEBSEARCH_URL).post(q: args))
|
83
|
+
end
|
84
|
+
|
54
85
|
def send_text(text:, reply_markup: nil, parse_mode: 'HTML')
|
55
86
|
bot.api.send_message(chat_id: chat.id,
|
56
87
|
text: text,
|
@@ -64,39 +95,57 @@ class TelegramWebProxy::MessageProcessor
|
|
64
95
|
# @param fragment [Integer] the number of the fragment to send
|
65
96
|
#
|
66
97
|
def send_fragment(fragment)
|
67
|
-
keyboard_markup =
|
98
|
+
keyboard_markup =
|
99
|
+
TelegramWebProxy::KeyboardGenerator.new(page.size, fragment).generate
|
68
100
|
begin
|
69
101
|
send_text(text: page[fragment], reply_markup: keyboard_markup)
|
70
|
-
rescue
|
71
|
-
|
72
|
-
text = "OOPS! Something wrong with fragment #{fragment + 1}. Here is a previous fragment: \n" + page[fragment - 1]
|
73
|
-
send_text(text: text, reply_markup: keyboard_markup)
|
74
|
-
else
|
75
|
-
send_text(text: "Error while sending fragment #{fragment + 1}")
|
76
|
-
end
|
102
|
+
rescue Telegram::Bot::Exceptions::ResponseError => e
|
103
|
+
handle_response_error(e, fragment, keyboard_markup)
|
77
104
|
end
|
78
105
|
end
|
79
106
|
|
80
|
-
def
|
81
|
-
|
82
|
-
|
83
|
-
|
84
|
-
|
85
|
-
|
86
|
-
|
87
|
-
while
|
88
|
-
end_pos -= 1
|
89
|
-
end
|
90
|
-
result << page[start_pos..end_pos] if start_pos < end_pos
|
91
|
-
start_pos = end_pos + 1
|
107
|
+
def handle_response_error(e, fragment, keyboard_markup)
|
108
|
+
if e.error_code.to_s == '400'
|
109
|
+
send_text(
|
110
|
+
text: "Error with fragment #{fragment + 1}. Previous one:\n" + page[fragment - 1],
|
111
|
+
reply_markup: keyboard_markup
|
112
|
+
)
|
113
|
+
else
|
114
|
+
send_text(text: "Error while sending fragment #{fragment + 1}")
|
92
115
|
end
|
93
|
-
result
|
94
116
|
end
|
95
117
|
|
118
|
+
def split_into_parts(page)
|
119
|
+
return [] if page.empty?
|
120
|
+
size = [page.size, MAX_FRAGMENT_SIZE].min
|
121
|
+
size = page[0, size].rindex('<') until valid_segment?(page[0, size])
|
122
|
+
raise "INVALID FRAGMENT near '#{page[0..500]}...'" unless size.positive?
|
123
|
+
[page[0, size], *split_into_parts(page[size..-1])]
|
124
|
+
end
|
125
|
+
|
126
|
+
# Checks if the segment of the page is valid and might be taken as a separate fragment
|
127
|
+
def valid_segment?(page_segment)
|
128
|
+
Nokogiri::XML("<e>#{page_segment}</e>").errors.empty?
|
129
|
+
end
|
130
|
+
|
131
|
+
def send_object(rest_response)
|
132
|
+
case rest_response.headers[:content_type].split('/')[0]
|
133
|
+
when 'text'
|
134
|
+
split_and_send(TelegramWebProxy::PageSanitizer.new(rest_response).sanitize)
|
135
|
+
when 'image'
|
136
|
+
send_text(text: '^')
|
137
|
+
else
|
138
|
+
send_text(text: 'Only text or image content is supported')
|
139
|
+
end
|
140
|
+
end
|
96
141
|
|
97
|
-
# Splits
|
98
|
-
def
|
99
|
-
self.page = split_into_parts(
|
142
|
+
# Splits fetched HTML and sends the first fragment
|
143
|
+
def split_and_send(html_doc)
|
144
|
+
self.page = split_into_parts(html_doc)
|
100
145
|
send_fragment(0)
|
101
146
|
end
|
147
|
+
|
148
|
+
def page_fetcher(url)
|
149
|
+
TelegramWebProxy::PageFetcher.new(url)
|
150
|
+
end
|
102
151
|
end
|
@@ -1,50 +1,14 @@
|
|
1
|
-
|
1
|
+
# coding: utf-8
|
2
|
+
# frozen_string_literal: true
|
2
3
|
|
3
|
-
|
4
|
+
TelegramWebProxy::PageFetcher = Struct.new(:url) do
|
4
5
|
HTTP_USER_AGENT = 'ELinks/0.13.GIT (textmode; Linux 2.6.29 i686; 119x51-2)'
|
5
6
|
|
6
7
|
def get
|
7
|
-
|
8
|
-
fix_relative_urls(sanitize_page(result.body), url)
|
8
|
+
RestClient.get(url, user_agent: HTTP_USER_AGENT)
|
9
9
|
end
|
10
10
|
|
11
11
|
def post(data)
|
12
|
-
|
13
|
-
fix_relative_urls(sanitize_page(result.body), url)
|
12
|
+
RestClient.post(url, data, user_agent: HTTP_USER_AGENT)
|
14
13
|
end
|
15
|
-
|
16
|
-
private
|
17
|
-
# Takes out most of HTML tags and unnecesary whitespaces
|
18
|
-
def sanitize_page(body)
|
19
|
-
Sanitize.fragment(body,
|
20
|
-
# these tags are allowed for Teregram, however,
|
21
|
-
# nested tags are not allowed.
|
22
|
-
# Would be great to resolve this issue later.
|
23
|
-
# elements: ['b', 'strong', 'i', 'em', 'a', 'code', 'pre'],
|
24
|
-
elements: ['a'],
|
25
|
-
attributes: {'a' => ['href']},
|
26
|
-
remove_contents: [:script, :style, :img],
|
27
|
-
whitespace_elements: {
|
28
|
-
'br' => {before: "\n", after: ""},
|
29
|
-
'div' => {before: "\n", after: "\n"},
|
30
|
-
'p' => {before: "\n", after: "\n"}
|
31
|
-
}).
|
32
|
-
gsub(/\t+/, " ").
|
33
|
-
gsub(/ +/, " ").
|
34
|
-
gsub(/\n+/, "\n").
|
35
|
-
gsub(/( \n)+/, " \n").
|
36
|
-
gsub("\n \n", "\n")
|
37
|
-
end
|
38
|
-
|
39
|
-
|
40
|
-
# Replaces relative urls to absolute.
|
41
|
-
#
|
42
|
-
# @param text [String] original text
|
43
|
-
# @param url [String] url from where the page was fetched
|
44
|
-
# @return [String]
|
45
|
-
def fix_relative_urls(text, url)
|
46
|
-
uri = URI.parse(url)
|
47
|
-
text.gsub('ref="/', "ref=\"#{uri.scheme || 'http'}://#{uri.host || uri.path}/")
|
48
|
-
end
|
49
|
-
|
50
14
|
end
|
@@ -0,0 +1,93 @@
|
|
1
|
+
# coding: utf-8
|
2
|
+
# frozen_string_literal: true
|
3
|
+
|
4
|
+
TelegramWebProxy::PageSanitizer = Struct.new(:response) do
|
5
|
+
WHITESPACE_ELEMENTS = {'br' => {before: "\n", after: ''},
|
6
|
+
'div' => {before: "\n", after: "\n"},
|
7
|
+
'p' => {before: "\n", after: "\n"}}.freeze
|
8
|
+
|
9
|
+
def sanitize
|
10
|
+
fix_relative_urls(prepare_page(response.body), response.request.url)
|
11
|
+
end
|
12
|
+
|
13
|
+
private
|
14
|
+
|
15
|
+
# Takes out most of HTML tags and unnecesary whitespaces
|
16
|
+
def prepare_page(body)
|
17
|
+
remove_whitespaces(transform_img(remove_nested_tags(sanitize_tags(body))))
|
18
|
+
end
|
19
|
+
|
20
|
+
def image_description(attrs)
|
21
|
+
CGI.escapeHTML((attrs['alt'] || attrs['title'] || 'img').to_s[0..20])
|
22
|
+
end
|
23
|
+
|
24
|
+
def image_replacement(attrs)
|
25
|
+
return '' if attrs['width'].to_s == '1' && attrs['height'].to_s == '1'
|
26
|
+
"<a href=\"#{CGI.escapeHTML(attrs['src'])}\">〖#{image_description(attrs)}〗</a>"
|
27
|
+
end
|
28
|
+
|
29
|
+
# Replaces <img src=... alt=...> with <a href=img_src>img_alt</a>
|
30
|
+
def transform_img(text)
|
31
|
+
replacements = text.scan(/<\\?img [^>]*>/).map do |img_tag|
|
32
|
+
attrs = Nokogiri::HTML.fragment(img_tag).children[0].attributes
|
33
|
+
[img_tag, image_replacement(attrs)]
|
34
|
+
end
|
35
|
+
replacements.inject(text) { |str, tags| str.gsub(tags[0], tags[1]) }
|
36
|
+
end
|
37
|
+
|
38
|
+
def remove_nested_tags(text)
|
39
|
+
tree = Nokogiri::HTML.fragment(text)
|
40
|
+
[process_tree(tree)].flatten.map(&:to_s).join
|
41
|
+
end
|
42
|
+
|
43
|
+
def no_nested_tags?(tree, tag_name)
|
44
|
+
tree.children.none? { |c| c.enum_for(:traverse).map(&:name).include?(tag_name) }
|
45
|
+
end
|
46
|
+
|
47
|
+
# Makes the tree flat.
|
48
|
+
# It takes the top-level tag and remove all nested tags unless it has nested <a> tag
|
49
|
+
# (otherwise it takes <a> tag and remove all wrapping tags)
|
50
|
+
def process_tree(tree)
|
51
|
+
return tree if tree.instance_of? Nokogiri::XML::Text
|
52
|
+
if (tree.name == 'a' || no_nested_tags?(tree, 'a')) &&
|
53
|
+
!tree.instance_of?(Nokogiri::HTML::DocumentFragment)
|
54
|
+
text = tree.xpath('.//text()').to_s
|
55
|
+
tree.children.map(&:remove)
|
56
|
+
tree.add_child text
|
57
|
+
tree
|
58
|
+
else
|
59
|
+
tree.children.map { |child| process_tree(child) }
|
60
|
+
end
|
61
|
+
end
|
62
|
+
|
63
|
+
def sanitize_tags(body)
|
64
|
+
Sanitize.fragment(
|
65
|
+
body,
|
66
|
+
elements: %w[b strong i em a code pre img],
|
67
|
+
attributes: {'a' => ['href'], 'img' => %w[src alt title width height]},
|
68
|
+
remove_contents: %i[script style img],
|
69
|
+
whitespace_elements: WHITESPACE_ELEMENTS
|
70
|
+
)
|
71
|
+
end
|
72
|
+
|
73
|
+
def remove_whitespaces(text)
|
74
|
+
# FIXME: remove also linebreak from "\n</tag>\n" and "\n<tag>\n"
|
75
|
+
text.
|
76
|
+
gsub(/\t+/, ' ').
|
77
|
+
gsub(/ +/, ' ').
|
78
|
+
gsub(/( \n)+/, "\n").
|
79
|
+
gsub(/\n+/, "\n").
|
80
|
+
strip
|
81
|
+
end
|
82
|
+
|
83
|
+
# Replaces relative urls to absolute.
|
84
|
+
#
|
85
|
+
# @param text [String] original text
|
86
|
+
# @param url [String] url from where the page was fetched
|
87
|
+
# @return [String]
|
88
|
+
def fix_relative_urls(text, url)
|
89
|
+
uri = URI.parse(url)
|
90
|
+
text.gsub('ref="//', "ref=\"#{uri.scheme || 'http'}://").
|
91
|
+
gsub('ref="/', "ref=\"#{uri.scheme || 'http'}://#{uri.host || uri.path}/")
|
92
|
+
end
|
93
|
+
end
|
data/telegram_web_proxy.gemspec
CHANGED
@@ -1,20 +1,23 @@
|
|
1
1
|
# coding: utf-8
|
2
|
+
# frozen_string_literal: true
|
3
|
+
|
2
4
|
lib = File.expand_path('../lib', __FILE__)
|
3
5
|
$LOAD_PATH.unshift(lib) unless $LOAD_PATH.include?(lib)
|
4
6
|
require 'telegram_web_proxy/version'
|
5
7
|
|
6
8
|
Gem::Specification.new do |spec|
|
7
|
-
spec.name =
|
9
|
+
spec.name = 'telegram_web_proxy'
|
8
10
|
spec.version = TelegramWebProxy::VERSION
|
9
|
-
spec.authors = [
|
10
|
-
spec.email = [
|
11
|
+
spec.authors = ['Vladimir Maslov']
|
12
|
+
spec.email = ['x86d0cent@gmail.com']
|
11
13
|
|
12
|
-
spec.summary =
|
13
|
-
spec.description =
|
14
|
-
spec.homepage =
|
15
|
-
spec.license =
|
14
|
+
spec.summary = 'Web proxy for Telegram'
|
15
|
+
spec.description = 'A simple web proxy bot for Telegram'
|
16
|
+
spec.homepage = 'https://gitlab.com/x86d0cent/telegram_web_proxy'
|
17
|
+
spec.license = 'MIT'
|
16
18
|
|
17
|
-
# Prevent pushing this gem to RubyGems.org.
|
19
|
+
# Prevent pushing this gem to RubyGems.org.
|
20
|
+
# To allow pushes either set the 'allowed_push_host'
|
18
21
|
# to allow pushing to a single host or delete this section to allow pushing to any host.
|
19
22
|
# if spec.respond_to?(:metadata)
|
20
23
|
# spec.metadata['allowed_push_host'] = "TODO: Set to 'http://mygemserver.com'"
|
@@ -26,16 +29,18 @@ Gem::Specification.new do |spec|
|
|
26
29
|
spec.files = `git ls-files -z`.split("\x0").reject do |f|
|
27
30
|
f.match(%r{^(test|spec|features)/})
|
28
31
|
end
|
29
|
-
spec.bindir =
|
32
|
+
spec.bindir = 'exe'
|
30
33
|
spec.executables = spec.files.grep(%r{^exe/}) { |f| File.basename(f) }
|
31
|
-
spec.require_paths = [
|
34
|
+
spec.require_paths = ['lib']
|
35
|
+
spec.required_ruby_version = '>= 2.4.0'
|
32
36
|
|
33
|
-
spec.add_development_dependency
|
34
|
-
spec.add_development_dependency
|
35
|
-
spec.add_development_dependency
|
37
|
+
spec.add_development_dependency 'bundler', '~> 1.14'
|
38
|
+
spec.add_development_dependency 'rake', '~> 10.0'
|
39
|
+
spec.add_development_dependency 'rspec', '~> 3.0'
|
40
|
+
spec.add_development_dependency 'rubocop'
|
41
|
+
spec.add_development_dependency 'rubocop-rspec'
|
36
42
|
|
37
43
|
spec.add_dependency 'rest-client', '~> 2.0'
|
38
44
|
spec.add_dependency 'sanitize', '~> 4.5'
|
39
45
|
spec.add_dependency 'telegram-bot-ruby', '~> 0.8.0'
|
40
|
-
|
41
46
|
end
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: telegram_web_proxy
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.
|
4
|
+
version: 0.2.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Vladimir Maslov
|
8
8
|
autorequire:
|
9
9
|
bindir: exe
|
10
10
|
cert_chain: []
|
11
|
-
date: 2017-
|
11
|
+
date: 2017-09-14 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: bundler
|
@@ -52,6 +52,34 @@ dependencies:
|
|
52
52
|
- - "~>"
|
53
53
|
- !ruby/object:Gem::Version
|
54
54
|
version: '3.0'
|
55
|
+
- !ruby/object:Gem::Dependency
|
56
|
+
name: rubocop
|
57
|
+
requirement: !ruby/object:Gem::Requirement
|
58
|
+
requirements:
|
59
|
+
- - ">="
|
60
|
+
- !ruby/object:Gem::Version
|
61
|
+
version: '0'
|
62
|
+
type: :development
|
63
|
+
prerelease: false
|
64
|
+
version_requirements: !ruby/object:Gem::Requirement
|
65
|
+
requirements:
|
66
|
+
- - ">="
|
67
|
+
- !ruby/object:Gem::Version
|
68
|
+
version: '0'
|
69
|
+
- !ruby/object:Gem::Dependency
|
70
|
+
name: rubocop-rspec
|
71
|
+
requirement: !ruby/object:Gem::Requirement
|
72
|
+
requirements:
|
73
|
+
- - ">="
|
74
|
+
- !ruby/object:Gem::Version
|
75
|
+
version: '0'
|
76
|
+
type: :development
|
77
|
+
prerelease: false
|
78
|
+
version_requirements: !ruby/object:Gem::Requirement
|
79
|
+
requirements:
|
80
|
+
- - ">="
|
81
|
+
- !ruby/object:Gem::Version
|
82
|
+
version: '0'
|
55
83
|
- !ruby/object:Gem::Dependency
|
56
84
|
name: rest-client
|
57
85
|
requirement: !ruby/object:Gem::Requirement
|
@@ -102,7 +130,9 @@ extensions: []
|
|
102
130
|
extra_rdoc_files: []
|
103
131
|
files:
|
104
132
|
- ".gitignore"
|
133
|
+
- ".gitlab-ci.yml"
|
105
134
|
- ".rspec"
|
135
|
+
- ".rubocop.yml"
|
106
136
|
- ".travis.yml"
|
107
137
|
- CODE_OF_CONDUCT.md
|
108
138
|
- Gemfile
|
@@ -115,6 +145,7 @@ files:
|
|
115
145
|
- lib/telegram_web_proxy/keyboard_generator.rb
|
116
146
|
- lib/telegram_web_proxy/message_processor.rb
|
117
147
|
- lib/telegram_web_proxy/page_fetcher.rb
|
148
|
+
- lib/telegram_web_proxy/page_sanitizer.rb
|
118
149
|
- lib/telegram_web_proxy/version.rb
|
119
150
|
- telegram_web_proxy.gemspec
|
120
151
|
homepage: https://gitlab.com/x86d0cent/telegram_web_proxy
|
@@ -129,7 +160,7 @@ required_ruby_version: !ruby/object:Gem::Requirement
|
|
129
160
|
requirements:
|
130
161
|
- - ">="
|
131
162
|
- !ruby/object:Gem::Version
|
132
|
-
version:
|
163
|
+
version: 2.4.0
|
133
164
|
required_rubygems_version: !ruby/object:Gem::Requirement
|
134
165
|
requirements:
|
135
166
|
- - ">="
|