feed_into 0.2.9
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +7 -0
- data/.circleci/config.yml +8 -0
- data/.gitignore +9 -0
- data/.rubocop.yml +13 -0
- data/CHANGELOG.md +5 -0
- data/CODE_OF_CONDUCT.md +84 -0
- data/Gemfile +10 -0
- data/Gemfile.lock +82 -0
- data/LICENSE.txt +21 -0
- data/README.md +944 -0
- data/Rakefile +8 -0
- data/bin/console +15 -0
- data/bin/setup +8 -0
- data/feed_into.gemspec +45 -0
- data/lib/feed_into/version.rb +5 -0
- data/lib/feed_into.rb +1433 -0
- data/lib/modules/general.rb +250 -0
- metadata +176 -0
@@ -0,0 +1,250 @@
|
|
1
|
+
module General
|
2
|
+
def crl_general( sym, cmd, channel, response, data, obj, custom={} )
|
3
|
+
messages = []
|
4
|
+
|
5
|
+
case sym
|
6
|
+
when :download
|
7
|
+
result, messages = crl_general_download( cmd[:url], obj )
|
8
|
+
when :pre_titles
|
9
|
+
result, messages = crl_general_pre_titles( cmd, channel, data, obj )
|
10
|
+
when :mining_rss_one
|
11
|
+
result = crl_general_mining_rss_one( cmd[:url], response, obj )
|
12
|
+
when :mining_rss_two
|
13
|
+
result = crl_general_mining_rss_two( cmd[:url], response, obj )
|
14
|
+
when :format_url_s3
|
15
|
+
result = crl_general_format_url_s3( obj, channel[:options][:html], custom[:query] )
|
16
|
+
when :format_html_remove
|
17
|
+
result = crl_general_format_html_remove( custom[:html] )
|
18
|
+
else
|
19
|
+
messages.push( "General: #{sym} not found." )
|
20
|
+
end
|
21
|
+
|
22
|
+
return result, messages
|
23
|
+
end
|
24
|
+
|
25
|
+
|
26
|
+
private
|
27
|
+
|
28
|
+
|
29
|
+
def crl_general_channels()
|
30
|
+
return []
|
31
|
+
end
|
32
|
+
|
33
|
+
|
34
|
+
def crl_general_download( url, obj )
|
35
|
+
version = ( rand( 89.0..91.0 ) + ( rand( 530.0..540.0 ) / 1000 ) ).round( 2 )
|
36
|
+
agent = obj[:format][:download][:agent].gsub( '{{version}}', version.to_s )
|
37
|
+
uri = URI( url )
|
38
|
+
|
39
|
+
header = {}
|
40
|
+
header['User-Agent'] = agent
|
41
|
+
header['Accept'] = 'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8'
|
42
|
+
header['Accept-Language'] = 'en-US,en;q=0.5'
|
43
|
+
header['Connection'] = 'keep-alive'
|
44
|
+
header['Upgrade-Insecure-Requests'] = '1'
|
45
|
+
header['Sec-Fetch-Dest'] = 'document'
|
46
|
+
header['Sec-Fetch-Mode'] = 'navigate'
|
47
|
+
header['Sec-Fetch-Site'] = 'none'
|
48
|
+
header['Sec-Fetch-User'] = '?1'
|
49
|
+
header['Pragma'] = 'no-cache'
|
50
|
+
header['Cache-Control'] = 'no-cache'
|
51
|
+
|
52
|
+
response = Net::HTTP.get_response( uri, header )
|
53
|
+
return response.body, [ "Download: Status #{response.code}" ]
|
54
|
+
end
|
55
|
+
|
56
|
+
|
57
|
+
def crl_general_mining_rss_one( url, response, obj )
|
58
|
+
doc = Nokogiri::XML( response )
|
59
|
+
|
60
|
+
feed = {
|
61
|
+
meta: {
|
62
|
+
title: nil,
|
63
|
+
url: nil
|
64
|
+
},
|
65
|
+
items: []
|
66
|
+
}
|
67
|
+
|
68
|
+
feed[:meta][:title] = doc.at( 'title' ).text.gsub( '"',"'" )
|
69
|
+
feed[:meta][:url] = url
|
70
|
+
|
71
|
+
entries = doc.css( 'item' )
|
72
|
+
entries.each do | entry |
|
73
|
+
item = {
|
74
|
+
title: nil,
|
75
|
+
time: {
|
76
|
+
stamp: nil,
|
77
|
+
utc: nil
|
78
|
+
}
|
79
|
+
}
|
80
|
+
|
81
|
+
tmp = entry.at( 'title' ).text
|
82
|
+
item[:title] = self
|
83
|
+
.method( 'crl_general' )
|
84
|
+
.call( :format_html_remove, nil, nil, nil, nil, nil, { html: tmp } )[ 0 ]
|
85
|
+
|
86
|
+
item[:title_viewer] = item[:title]
|
87
|
+
item[:time][:stamp] = Time.parse( entry.at( 'pubDate' ) ).to_i
|
88
|
+
item[:time][:utc] = entry.at( 'pubDate' ).text
|
89
|
+
item[:url] = entry.at( 'link' ).text
|
90
|
+
|
91
|
+
feed[:items].push( item )
|
92
|
+
end
|
93
|
+
|
94
|
+
return feed
|
95
|
+
end
|
96
|
+
|
97
|
+
|
98
|
+
def crl_general_mining_rss_two( url, response, obj )
|
99
|
+
doc = Nokogiri::XML( response )
|
100
|
+
|
101
|
+
feed = {
|
102
|
+
meta: {
|
103
|
+
title: nil,
|
104
|
+
url: nil
|
105
|
+
},
|
106
|
+
items: []
|
107
|
+
}
|
108
|
+
|
109
|
+
feed[:meta][:title] = doc.at( 'title' ).text.gsub( '"',"'" )
|
110
|
+
feed[:meta][:url] = url
|
111
|
+
|
112
|
+
entries = doc.css( 'entry' )
|
113
|
+
entries.each do | entry |
|
114
|
+
item = {
|
115
|
+
title: nil,
|
116
|
+
time: {
|
117
|
+
stamp: nil,
|
118
|
+
utc: nil
|
119
|
+
}
|
120
|
+
}
|
121
|
+
|
122
|
+
tmp = entry.at( 'title' ).text
|
123
|
+
item[:title] = self
|
124
|
+
.method( 'crl_general' )
|
125
|
+
.call( :format_html_remove, nil, nil, nil, nil, nil, { html: tmp } )[ 0 ]
|
126
|
+
|
127
|
+
item[:title_viewer] = item[:title]
|
128
|
+
item[:time][:stamp] = Time.parse( entry.at( 'updated' ) ).to_i
|
129
|
+
item[:time][:utc] = entry.at( 'updated' ).text
|
130
|
+
item[:url] = entry.at( 'link' ).attribute('href').value
|
131
|
+
|
132
|
+
feed[:items].push( item )
|
133
|
+
end
|
134
|
+
|
135
|
+
return feed
|
136
|
+
end
|
137
|
+
|
138
|
+
|
139
|
+
def crl_general_format_url_s3( obj, file, query )
|
140
|
+
result = ''
|
141
|
+
result << 'https://'
|
142
|
+
result << obj[:options][:s3][:bucket_name]
|
143
|
+
result << '.s3.'
|
144
|
+
result << obj[:options][:s3][:region]
|
145
|
+
result << '.amazonaws.com/'
|
146
|
+
result << obj[:options][:s3][:bucket_sub_folder]
|
147
|
+
result << obj[:options][:s3][:bucket_folder]
|
148
|
+
result << file
|
149
|
+
result << '?'
|
150
|
+
result << URI.encode_www_form( query )
|
151
|
+
|
152
|
+
return result
|
153
|
+
end
|
154
|
+
|
155
|
+
|
156
|
+
def crl_general_format_html_remove( html )
|
157
|
+
|
158
|
+
result = ''
|
159
|
+
Nokogiri::HTML( CGI.unescapeHTML( html.to_s ) ).traverse do | e |
|
160
|
+
result << e.text if e.text?
|
161
|
+
end
|
162
|
+
|
163
|
+
result = result
|
164
|
+
.strip
|
165
|
+
.split( ' ' )
|
166
|
+
.map{ | word | word.capitalize }
|
167
|
+
.join( ' ' )
|
168
|
+
|
169
|
+
return result
|
170
|
+
end
|
171
|
+
|
172
|
+
|
173
|
+
def crl_general_pre_titles( cmd, channel, data, obj )
|
174
|
+
messages = []
|
175
|
+
|
176
|
+
data[:items].map.with_index do | item, index |
|
177
|
+
title, errors = crl_general_pre_title( cmd, channel, data, index, obj )
|
178
|
+
messages.concat( errors )
|
179
|
+
item[:title] = title
|
180
|
+
end
|
181
|
+
|
182
|
+
return data, messages
|
183
|
+
end
|
184
|
+
|
185
|
+
|
186
|
+
def crl_general_pre_title( cmd, channel, data, d_index, obj )
|
187
|
+
messages = []
|
188
|
+
str = obj[:format][:title][:str]
|
189
|
+
|
190
|
+
parts = str
|
191
|
+
.scan( /\{{[a-z,_,:]+\}}/ )
|
192
|
+
.map { | match |
|
193
|
+
{
|
194
|
+
gsub: match,
|
195
|
+
cmd: match.gsub( /[{:}]/, '' )
|
196
|
+
}
|
197
|
+
}
|
198
|
+
|
199
|
+
parts.each do | part |
|
200
|
+
text = part[:cmd].to_sym
|
201
|
+
formats = []
|
202
|
+
|
203
|
+
if !part[:cmd].index( '__' ).nil?
|
204
|
+
tmp = part[:cmd].split( '__' )
|
205
|
+
formats = tmp.last.split( '_' ).map { | a | a.to_sym }
|
206
|
+
text = tmp[ 0 ].to_sym
|
207
|
+
end
|
208
|
+
|
209
|
+
case text
|
210
|
+
when :cmd_name
|
211
|
+
insert = cmd[:name].dup.to_s
|
212
|
+
when :channel_name
|
213
|
+
insert = channel[:name].dup.to_s.gsub( '_', ' ' )
|
214
|
+
when :sym
|
215
|
+
insert = obj[:format][:title][:symbol][ channel[:sym] ].dup
|
216
|
+
when :separator
|
217
|
+
insert = obj[:format][:title][:separator].dup
|
218
|
+
when :title_channel
|
219
|
+
insert = channel[:name].dup
|
220
|
+
when :title_item
|
221
|
+
insert = data[:items][ d_index ][:title].dup
|
222
|
+
when :title_meta
|
223
|
+
insert = data[:meta][:title].dup
|
224
|
+
else
|
225
|
+
messages.push( "Set Title (insert): #{text} not found." )
|
226
|
+
end
|
227
|
+
|
228
|
+
formats.each do | f |
|
229
|
+
case f
|
230
|
+
when :upcase
|
231
|
+
f.eql?( :upcase ) ? insert.upcase! : ''
|
232
|
+
when :titleize
|
233
|
+
insert = insert
|
234
|
+
.split( ' ' )
|
235
|
+
.map { | word | word.capitalize }
|
236
|
+
.join( ' ' )
|
237
|
+
else
|
238
|
+
messages.push( "Set Title (format): #{text} not found." )
|
239
|
+
end
|
240
|
+
end
|
241
|
+
str = str.gsub( part[:gsub], insert )
|
242
|
+
end
|
243
|
+
|
244
|
+
if str.length > obj[:format][:title][:length]
|
245
|
+
str = str[ 0, obj[:format][:title][:length] ] + obj[:format][:title][:more]
|
246
|
+
end
|
247
|
+
|
248
|
+
return str, messages
|
249
|
+
end
|
250
|
+
end
|
metadata
ADDED
@@ -0,0 +1,176 @@
|
|
1
|
+
--- !ruby/object:Gem::Specification
|
2
|
+
name: feed_into
|
3
|
+
version: !ruby/object:Gem::Version
|
4
|
+
version: 0.2.9
|
5
|
+
platform: ruby
|
6
|
+
authors:
|
7
|
+
- a6b8
|
8
|
+
autorequire:
|
9
|
+
bindir: exe
|
10
|
+
cert_chain: []
|
11
|
+
date: 2023-04-13 00:00:00.000000000 Z
|
12
|
+
dependencies:
|
13
|
+
- !ruby/object:Gem::Dependency
|
14
|
+
name: nokogiri
|
15
|
+
requirement: !ruby/object:Gem::Requirement
|
16
|
+
requirements:
|
17
|
+
- - "~>"
|
18
|
+
- !ruby/object:Gem::Version
|
19
|
+
version: 1.14.2
|
20
|
+
type: :runtime
|
21
|
+
prerelease: false
|
22
|
+
version_requirements: !ruby/object:Gem::Requirement
|
23
|
+
requirements:
|
24
|
+
- - "~>"
|
25
|
+
- !ruby/object:Gem::Version
|
26
|
+
version: 1.14.2
|
27
|
+
- !ruby/object:Gem::Dependency
|
28
|
+
name: time
|
29
|
+
requirement: !ruby/object:Gem::Requirement
|
30
|
+
requirements:
|
31
|
+
- - "~>"
|
32
|
+
- !ruby/object:Gem::Version
|
33
|
+
version: 0.1.0
|
34
|
+
type: :runtime
|
35
|
+
prerelease: false
|
36
|
+
version_requirements: !ruby/object:Gem::Requirement
|
37
|
+
requirements:
|
38
|
+
- - "~>"
|
39
|
+
- !ruby/object:Gem::Version
|
40
|
+
version: 0.1.0
|
41
|
+
- !ruby/object:Gem::Dependency
|
42
|
+
name: tzinfo
|
43
|
+
requirement: !ruby/object:Gem::Requirement
|
44
|
+
requirements:
|
45
|
+
- - "~>"
|
46
|
+
- !ruby/object:Gem::Version
|
47
|
+
version: 2.0.4
|
48
|
+
type: :runtime
|
49
|
+
prerelease: false
|
50
|
+
version_requirements: !ruby/object:Gem::Requirement
|
51
|
+
requirements:
|
52
|
+
- - "~>"
|
53
|
+
- !ruby/object:Gem::Version
|
54
|
+
version: 2.0.4
|
55
|
+
- !ruby/object:Gem::Dependency
|
56
|
+
name: cgi
|
57
|
+
requirement: !ruby/object:Gem::Requirement
|
58
|
+
requirements:
|
59
|
+
- - "~>"
|
60
|
+
- !ruby/object:Gem::Version
|
61
|
+
version: 0.2.0
|
62
|
+
type: :runtime
|
63
|
+
prerelease: false
|
64
|
+
version_requirements: !ruby/object:Gem::Requirement
|
65
|
+
requirements:
|
66
|
+
- - "~>"
|
67
|
+
- !ruby/object:Gem::Version
|
68
|
+
version: 0.2.0
|
69
|
+
- !ruby/object:Gem::Dependency
|
70
|
+
name: json
|
71
|
+
requirement: !ruby/object:Gem::Requirement
|
72
|
+
requirements:
|
73
|
+
- - "~>"
|
74
|
+
- !ruby/object:Gem::Version
|
75
|
+
version: 2.5.1
|
76
|
+
type: :runtime
|
77
|
+
prerelease: false
|
78
|
+
version_requirements: !ruby/object:Gem::Requirement
|
79
|
+
requirements:
|
80
|
+
- - "~>"
|
81
|
+
- !ruby/object:Gem::Version
|
82
|
+
version: 2.5.1
|
83
|
+
- !ruby/object:Gem::Dependency
|
84
|
+
name: uri
|
85
|
+
requirement: !ruby/object:Gem::Requirement
|
86
|
+
requirements:
|
87
|
+
- - "~>"
|
88
|
+
- !ruby/object:Gem::Version
|
89
|
+
version: 0.12.1
|
90
|
+
type: :runtime
|
91
|
+
prerelease: false
|
92
|
+
version_requirements: !ruby/object:Gem::Requirement
|
93
|
+
requirements:
|
94
|
+
- - "~>"
|
95
|
+
- !ruby/object:Gem::Version
|
96
|
+
version: 0.12.1
|
97
|
+
- !ruby/object:Gem::Dependency
|
98
|
+
name: net-http
|
99
|
+
requirement: !ruby/object:Gem::Requirement
|
100
|
+
requirements:
|
101
|
+
- - "~>"
|
102
|
+
- !ruby/object:Gem::Version
|
103
|
+
version: 0.1.1
|
104
|
+
type: :runtime
|
105
|
+
prerelease: false
|
106
|
+
version_requirements: !ruby/object:Gem::Requirement
|
107
|
+
requirements:
|
108
|
+
- - "~>"
|
109
|
+
- !ruby/object:Gem::Version
|
110
|
+
version: 0.1.1
|
111
|
+
- !ruby/object:Gem::Dependency
|
112
|
+
name: activesupport
|
113
|
+
requirement: !ruby/object:Gem::Requirement
|
114
|
+
requirements:
|
115
|
+
- - "~>"
|
116
|
+
- !ruby/object:Gem::Version
|
117
|
+
version: '6.1'
|
118
|
+
type: :runtime
|
119
|
+
prerelease: false
|
120
|
+
version_requirements: !ruby/object:Gem::Requirement
|
121
|
+
requirements:
|
122
|
+
- - "~>"
|
123
|
+
- !ruby/object:Gem::Version
|
124
|
+
version: '6.1'
|
125
|
+
description: Merge multiple different data streams into a custom structure. Also easy
|
126
|
+
to expand by a custom module system.
|
127
|
+
email:
|
128
|
+
- hello@13plus4.com
|
129
|
+
executables: []
|
130
|
+
extensions: []
|
131
|
+
extra_rdoc_files: []
|
132
|
+
files:
|
133
|
+
- ".circleci/config.yml"
|
134
|
+
- ".gitignore"
|
135
|
+
- ".rubocop.yml"
|
136
|
+
- CHANGELOG.md
|
137
|
+
- CODE_OF_CONDUCT.md
|
138
|
+
- Gemfile
|
139
|
+
- Gemfile.lock
|
140
|
+
- LICENSE.txt
|
141
|
+
- README.md
|
142
|
+
- Rakefile
|
143
|
+
- bin/console
|
144
|
+
- bin/setup
|
145
|
+
- feed_into.gemspec
|
146
|
+
- lib/feed_into.rb
|
147
|
+
- lib/feed_into/version.rb
|
148
|
+
- lib/modules/general.rb
|
149
|
+
homepage: https://github.com/a6b8/feed-into-for-ruby
|
150
|
+
licenses:
|
151
|
+
- MIT
|
152
|
+
metadata:
|
153
|
+
allowed_push_host: https://rubygems.org
|
154
|
+
homepage_uri: https://github.com/a6b8/feed-into-for-ruby
|
155
|
+
source_code_uri: https://github.com/a6b8/feed-into-for-ruby
|
156
|
+
changelog_uri: https://raw.githubusercontent.com/a6b8/feed-into-for-ruby/main/CHANGELOG.md
|
157
|
+
post_install_message:
|
158
|
+
rdoc_options: []
|
159
|
+
require_paths:
|
160
|
+
- lib
|
161
|
+
required_ruby_version: !ruby/object:Gem::Requirement
|
162
|
+
requirements:
|
163
|
+
- - ">="
|
164
|
+
- !ruby/object:Gem::Version
|
165
|
+
version: 2.4.0
|
166
|
+
required_rubygems_version: !ruby/object:Gem::Requirement
|
167
|
+
requirements:
|
168
|
+
- - ">="
|
169
|
+
- !ruby/object:Gem::Version
|
170
|
+
version: '0'
|
171
|
+
requirements: []
|
172
|
+
rubygems_version: 3.2.3
|
173
|
+
signing_key:
|
174
|
+
specification_version: 4
|
175
|
+
summary: Merge multiple different data streams into a custom structure.
|
176
|
+
test_files: []
|