feed_into 0.2.9

Sign up to get free protection for your applications and to get access to all the features.
@@ -0,0 +1,250 @@
1
+ module General
2
+ def crl_general( sym, cmd, channel, response, data, obj, custom={} )
3
+ messages = []
4
+
5
+ case sym
6
+ when :download
7
+ result, messages = crl_general_download( cmd[:url], obj )
8
+ when :pre_titles
9
+ result, messages = crl_general_pre_titles( cmd, channel, data, obj )
10
+ when :mining_rss_one
11
+ result = crl_general_mining_rss_one( cmd[:url], response, obj )
12
+ when :mining_rss_two
13
+ result = crl_general_mining_rss_two( cmd[:url], response, obj )
14
+ when :format_url_s3
15
+ result = crl_general_format_url_s3( obj, channel[:options][:html], custom[:query] )
16
+ when :format_html_remove
17
+ result = crl_general_format_html_remove( custom[:html] )
18
+ else
19
+ messages.push( "General: #{sym} not found." )
20
+ end
21
+
22
+ return result, messages
23
+ end
24
+
25
+
26
+ private
27
+
28
+
29
+ def crl_general_channels()
30
+ return []
31
+ end
32
+
33
+
34
+ def crl_general_download( url, obj )
35
+ version = ( rand( 89.0..91.0 ) + ( rand( 530.0..540.0 ) / 1000 ) ).round( 2 )
36
+ agent = obj[:format][:download][:agent].gsub( '{{version}}', version.to_s )
37
+ uri = URI( url )
38
+
39
+ header = {}
40
+ header['User-Agent'] = agent
41
+ header['Accept'] = 'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8'
42
+ header['Accept-Language'] = 'en-US,en;q=0.5'
43
+ header['Connection'] = 'keep-alive'
44
+ header['Upgrade-Insecure-Requests'] = '1'
45
+ header['Sec-Fetch-Dest'] = 'document'
46
+ header['Sec-Fetch-Mode'] = 'navigate'
47
+ header['Sec-Fetch-Site'] = 'none'
48
+ header['Sec-Fetch-User'] = '?1'
49
+ header['Pragma'] = 'no-cache'
50
+ header['Cache-Control'] = 'no-cache'
51
+
52
+ response = Net::HTTP.get_response( uri, header )
53
+ return response.body, [ "Download: Status #{response.code}" ]
54
+ end
55
+
56
+
57
+ def crl_general_mining_rss_one( url, response, obj )
58
+ doc = Nokogiri::XML( response )
59
+
60
+ feed = {
61
+ meta: {
62
+ title: nil,
63
+ url: nil
64
+ },
65
+ items: []
66
+ }
67
+
68
+ feed[:meta][:title] = doc.at( 'title' ).text.gsub( '"',"'" )
69
+ feed[:meta][:url] = url
70
+
71
+ entries = doc.css( 'item' )
72
+ entries.each do | entry |
73
+ item = {
74
+ title: nil,
75
+ time: {
76
+ stamp: nil,
77
+ utc: nil
78
+ }
79
+ }
80
+
81
+ tmp = entry.at( 'title' ).text
82
+ item[:title] = self
83
+ .method( 'crl_general' )
84
+ .call( :format_html_remove, nil, nil, nil, nil, nil, { html: tmp } )[ 0 ]
85
+
86
+ item[:title_viewer] = item[:title]
87
+ item[:time][:stamp] = Time.parse( entry.at( 'pubDate' ) ).to_i
88
+ item[:time][:utc] = entry.at( 'pubDate' ).text
89
+ item[:url] = entry.at( 'link' ).text
90
+
91
+ feed[:items].push( item )
92
+ end
93
+
94
+ return feed
95
+ end
96
+
97
+
98
+ def crl_general_mining_rss_two( url, response, obj )
99
+ doc = Nokogiri::XML( response )
100
+
101
+ feed = {
102
+ meta: {
103
+ title: nil,
104
+ url: nil
105
+ },
106
+ items: []
107
+ }
108
+
109
+ feed[:meta][:title] = doc.at( 'title' ).text.gsub( '"',"'" )
110
+ feed[:meta][:url] = url
111
+
112
+ entries = doc.css( 'entry' )
113
+ entries.each do | entry |
114
+ item = {
115
+ title: nil,
116
+ time: {
117
+ stamp: nil,
118
+ utc: nil
119
+ }
120
+ }
121
+
122
+ tmp = entry.at( 'title' ).text
123
+ item[:title] = self
124
+ .method( 'crl_general' )
125
+ .call( :format_html_remove, nil, nil, nil, nil, nil, { html: tmp } )[ 0 ]
126
+
127
+ item[:title_viewer] = item[:title]
128
+ item[:time][:stamp] = Time.parse( entry.at( 'updated' ) ).to_i
129
+ item[:time][:utc] = entry.at( 'updated' ).text
130
+ item[:url] = entry.at( 'link' ).attribute('href').value
131
+
132
+ feed[:items].push( item )
133
+ end
134
+
135
+ return feed
136
+ end
137
+
138
+
139
+ def crl_general_format_url_s3( obj, file, query )
140
+ result = ''
141
+ result << 'https://'
142
+ result << obj[:options][:s3][:bucket_name]
143
+ result << '.s3.'
144
+ result << obj[:options][:s3][:region]
145
+ result << '.amazonaws.com/'
146
+ result << obj[:options][:s3][:bucket_sub_folder]
147
+ result << obj[:options][:s3][:bucket_folder]
148
+ result << file
149
+ result << '?'
150
+ result << URI.encode_www_form( query )
151
+
152
+ return result
153
+ end
154
+
155
+
156
+ def crl_general_format_html_remove( html )
157
+
158
+ result = ''
159
+ Nokogiri::HTML( CGI.unescapeHTML( html.to_s ) ).traverse do | e |
160
+ result << e.text if e.text?
161
+ end
162
+
163
+ result = result
164
+ .strip
165
+ .split( ' ' )
166
+ .map{ | word | word.capitalize }
167
+ .join( ' ' )
168
+
169
+ return result
170
+ end
171
+
172
+
173
+ def crl_general_pre_titles( cmd, channel, data, obj )
174
+ messages = []
175
+
176
+ data[:items].map.with_index do | item, index |
177
+ title, errors = crl_general_pre_title( cmd, channel, data, index, obj )
178
+ messages.concat( errors )
179
+ item[:title] = title
180
+ end
181
+
182
+ return data, messages
183
+ end
184
+
185
+
186
+ def crl_general_pre_title( cmd, channel, data, d_index, obj )
187
+ messages = []
188
+ str = obj[:format][:title][:str]
189
+
190
+ parts = str
191
+ .scan( /\{{[a-z,_,:]+\}}/ )
192
+ .map { | match |
193
+ {
194
+ gsub: match,
195
+ cmd: match.gsub( /[{:}]/, '' )
196
+ }
197
+ }
198
+
199
+ parts.each do | part |
200
+ text = part[:cmd].to_sym
201
+ formats = []
202
+
203
+ if !part[:cmd].index( '__' ).nil?
204
+ tmp = part[:cmd].split( '__' )
205
+ formats = tmp.last.split( '_' ).map { | a | a.to_sym }
206
+ text = tmp[ 0 ].to_sym
207
+ end
208
+
209
+ case text
210
+ when :cmd_name
211
+ insert = cmd[:name].dup.to_s
212
+ when :channel_name
213
+ insert = channel[:name].dup.to_s.gsub( '_', ' ' )
214
+ when :sym
215
+ insert = obj[:format][:title][:symbol][ channel[:sym] ].dup
216
+ when :separator
217
+ insert = obj[:format][:title][:separator].dup
218
+ when :title_channel
219
+ insert = channel[:name].dup
220
+ when :title_item
221
+ insert = data[:items][ d_index ][:title].dup
222
+ when :title_meta
223
+ insert = data[:meta][:title].dup
224
+ else
225
+ messages.push( "Set Title (insert): #{text} not found." )
226
+ end
227
+
228
+ formats.each do | f |
229
+ case f
230
+ when :upcase
231
+ f.eql?( :upcase ) ? insert.upcase! : ''
232
+ when :titleize
233
+ insert = insert
234
+ .split( ' ' )
235
+ .map { | word | word.capitalize }
236
+ .join( ' ' )
237
+ else
238
+ messages.push( "Set Title (format): #{text} not found." )
239
+ end
240
+ end
241
+ str = str.gsub( part[:gsub], insert )
242
+ end
243
+
244
+ if str.length > obj[:format][:title][:length]
245
+ str = str[ 0, obj[:format][:title][:length] ] + obj[:format][:title][:more]
246
+ end
247
+
248
+ return str, messages
249
+ end
250
+ end
metadata ADDED
@@ -0,0 +1,176 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: feed_into
3
+ version: !ruby/object:Gem::Version
4
+ version: 0.2.9
5
+ platform: ruby
6
+ authors:
7
+ - a6b8
8
+ autorequire:
9
+ bindir: exe
10
+ cert_chain: []
11
+ date: 2023-04-13 00:00:00.000000000 Z
12
+ dependencies:
13
+ - !ruby/object:Gem::Dependency
14
+ name: nokogiri
15
+ requirement: !ruby/object:Gem::Requirement
16
+ requirements:
17
+ - - "~>"
18
+ - !ruby/object:Gem::Version
19
+ version: 1.14.2
20
+ type: :runtime
21
+ prerelease: false
22
+ version_requirements: !ruby/object:Gem::Requirement
23
+ requirements:
24
+ - - "~>"
25
+ - !ruby/object:Gem::Version
26
+ version: 1.14.2
27
+ - !ruby/object:Gem::Dependency
28
+ name: time
29
+ requirement: !ruby/object:Gem::Requirement
30
+ requirements:
31
+ - - "~>"
32
+ - !ruby/object:Gem::Version
33
+ version: 0.1.0
34
+ type: :runtime
35
+ prerelease: false
36
+ version_requirements: !ruby/object:Gem::Requirement
37
+ requirements:
38
+ - - "~>"
39
+ - !ruby/object:Gem::Version
40
+ version: 0.1.0
41
+ - !ruby/object:Gem::Dependency
42
+ name: tzinfo
43
+ requirement: !ruby/object:Gem::Requirement
44
+ requirements:
45
+ - - "~>"
46
+ - !ruby/object:Gem::Version
47
+ version: 2.0.4
48
+ type: :runtime
49
+ prerelease: false
50
+ version_requirements: !ruby/object:Gem::Requirement
51
+ requirements:
52
+ - - "~>"
53
+ - !ruby/object:Gem::Version
54
+ version: 2.0.4
55
+ - !ruby/object:Gem::Dependency
56
+ name: cgi
57
+ requirement: !ruby/object:Gem::Requirement
58
+ requirements:
59
+ - - "~>"
60
+ - !ruby/object:Gem::Version
61
+ version: 0.2.0
62
+ type: :runtime
63
+ prerelease: false
64
+ version_requirements: !ruby/object:Gem::Requirement
65
+ requirements:
66
+ - - "~>"
67
+ - !ruby/object:Gem::Version
68
+ version: 0.2.0
69
+ - !ruby/object:Gem::Dependency
70
+ name: json
71
+ requirement: !ruby/object:Gem::Requirement
72
+ requirements:
73
+ - - "~>"
74
+ - !ruby/object:Gem::Version
75
+ version: 2.5.1
76
+ type: :runtime
77
+ prerelease: false
78
+ version_requirements: !ruby/object:Gem::Requirement
79
+ requirements:
80
+ - - "~>"
81
+ - !ruby/object:Gem::Version
82
+ version: 2.5.1
83
+ - !ruby/object:Gem::Dependency
84
+ name: uri
85
+ requirement: !ruby/object:Gem::Requirement
86
+ requirements:
87
+ - - "~>"
88
+ - !ruby/object:Gem::Version
89
+ version: 0.12.1
90
+ type: :runtime
91
+ prerelease: false
92
+ version_requirements: !ruby/object:Gem::Requirement
93
+ requirements:
94
+ - - "~>"
95
+ - !ruby/object:Gem::Version
96
+ version: 0.12.1
97
+ - !ruby/object:Gem::Dependency
98
+ name: net-http
99
+ requirement: !ruby/object:Gem::Requirement
100
+ requirements:
101
+ - - "~>"
102
+ - !ruby/object:Gem::Version
103
+ version: 0.1.1
104
+ type: :runtime
105
+ prerelease: false
106
+ version_requirements: !ruby/object:Gem::Requirement
107
+ requirements:
108
+ - - "~>"
109
+ - !ruby/object:Gem::Version
110
+ version: 0.1.1
111
+ - !ruby/object:Gem::Dependency
112
+ name: activesupport
113
+ requirement: !ruby/object:Gem::Requirement
114
+ requirements:
115
+ - - "~>"
116
+ - !ruby/object:Gem::Version
117
+ version: '6.1'
118
+ type: :runtime
119
+ prerelease: false
120
+ version_requirements: !ruby/object:Gem::Requirement
121
+ requirements:
122
+ - - "~>"
123
+ - !ruby/object:Gem::Version
124
+ version: '6.1'
125
+ description: Merge multiple different data streams into a custom structure. Also easy
126
+ to expand by a custom module system.
127
+ email:
128
+ - hello@13plus4.com
129
+ executables: []
130
+ extensions: []
131
+ extra_rdoc_files: []
132
+ files:
133
+ - ".circleci/config.yml"
134
+ - ".gitignore"
135
+ - ".rubocop.yml"
136
+ - CHANGELOG.md
137
+ - CODE_OF_CONDUCT.md
138
+ - Gemfile
139
+ - Gemfile.lock
140
+ - LICENSE.txt
141
+ - README.md
142
+ - Rakefile
143
+ - bin/console
144
+ - bin/setup
145
+ - feed_into.gemspec
146
+ - lib/feed_into.rb
147
+ - lib/feed_into/version.rb
148
+ - lib/modules/general.rb
149
+ homepage: https://github.com/a6b8/feed-into-for-ruby
150
+ licenses:
151
+ - MIT
152
+ metadata:
153
+ allowed_push_host: https://rubygems.org
154
+ homepage_uri: https://github.com/a6b8/feed-into-for-ruby
155
+ source_code_uri: https://github.com/a6b8/feed-into-for-ruby
156
+ changelog_uri: https://raw.githubusercontent.com/a6b8/feed-into-for-ruby/main/CHANGELOG.md
157
+ post_install_message:
158
+ rdoc_options: []
159
+ require_paths:
160
+ - lib
161
+ required_ruby_version: !ruby/object:Gem::Requirement
162
+ requirements:
163
+ - - ">="
164
+ - !ruby/object:Gem::Version
165
+ version: 2.4.0
166
+ required_rubygems_version: !ruby/object:Gem::Requirement
167
+ requirements:
168
+ - - ">="
169
+ - !ruby/object:Gem::Version
170
+ version: '0'
171
+ requirements: []
172
+ rubygems_version: 3.2.3
173
+ signing_key:
174
+ specification_version: 4
175
+ summary: Merge multiple different data streams into a custom structure.
176
+ test_files: []