flnews_post_proc 1.7

Sign up to get free protection for your applications and to get access to all the features.
data/lib/body.rb ADDED
@@ -0,0 +1,364 @@
1
+ #encoding: UTF-8
2
+
3
+ =begin
4
+ /***************************************************************************
5
+ * 2023-2024, Michael Uplawski <michael.uplawski@uplawski.eu> *
6
+ * This program is free software; you can redistribute it and/or modify *
7
+ * it under the terms of the WTFPL 2.0 or later, see *
8
+ * http://www.wtfpl.net/about/ *
9
+ * *
10
+ * This program is distributed in the hope that it will be useful, *
11
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of *
12
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. *
13
+ * *
14
+ ***************************************************************************/
15
+ =end
16
+
17
+ require_relative 'basic_logging'
18
+ require_relative 'configuration'
19
+
20
+ WRAP_LENGTH = 65
21
+ # Endow the String class with a wrap function.
22
+ # This is not yet applicable to the message body, itself.
23
+ class String
24
+ include BasicLogging
25
+ public
26
+ # wraps at length, returns the result
27
+ def wrap! (length = WRAP_LENGTH, indent = 0)
28
+ max = length
29
+
30
+ line = 0
31
+ out = [""]
32
+
33
+ self.gsub!(/^\r\n/, "[rnrn]")
34
+ self.gsub!("\r\n", " ")
35
+ self.gsub!(" ", " ")
36
+
37
+ words = self.split(" ")
38
+
39
+ while !words.empty?
40
+ word = words.shift.strip
41
+ break if not word
42
+ if out[line].length + word.length > max
43
+ out[line].squeeze!(" ")
44
+ line += 1
45
+ out[line] = (line > 0 ? " " * indent : "")
46
+ end
47
+ out[line] << word + " "
48
+ end
49
+ out = out.join("\r\n")
50
+ out.gsub!("[rnrn]", "\r\n\r\n")
51
+ self.replace( out)
52
+ end
53
+
54
+ end
55
+
56
+ # An object of this class represents the body of a news-article.
57
+ # It processes the original text and changes some details:
58
+ # intro-lines
59
+ # signatures
60
+ # URI formats
61
+ # Footnotes (and/or a list of references) are created
62
+ class Body
63
+ # a class-level configuration instance.
64
+ @@config = Configuration.instance
65
+ include BasicLogging
66
+
67
+ # reads the body text of the article
68
+ def initialize(article_text)
69
+ debug 'body intialize'
70
+ # for simplicity.
71
+ line = nil
72
+ # transform the article into an array.
73
+ line_array = article_text.split($LN)
74
+ # keep only from the first after an empty line ''
75
+ start_index = line_array.index('')
76
+
77
+ # ... to the end of the current array (all that follows '').
78
+ @lines = line_array.slice(start_index + 1, line_array.size)
79
+ debug('initialize(): body lines are ' << @lines.inspect)
80
+ end
81
+
82
+ # If so configured, replace an eventual followup-intro by the one
83
+ # configured for a group. This may depend on other conditions and
84
+ # must be triggered explicitly.
85
+ def set_intro(intro)
86
+ return if !intro || intro.empty? || @@config.no_intro
87
+
88
+ # name of the previous poster
89
+ fup_name = nil
90
+ # the current newsgroup
91
+ fup_group = nil
92
+
93
+ debug('FUP_NAME is ' << @@config.FUP_NAME)
94
+ debug('FUP_GROUP is ' << @@config.FUP_GROUP)
95
+ # The expressions which allow the identification of both
96
+ # in the current article.
97
+ fn = @@config.FUP_NAME
98
+ fg = @@config.FUP_GROUP
99
+
100
+ # Okay, this is called parsing, when it is well done.
101
+ # I just try and am happy when it works.
102
+ @lines.each_with_index do |line, i|
103
+ # find the name in the intro-line
104
+ if !fn.strip.empty? && !line.strip.empty? && !fup_name
105
+ # match a name
106
+ fup_name = line.match(Regexp.new(fn) ) do |md|
107
+ # debug("\tmatch: " << md.to_s)
108
+ md.length == 2 ? md[1] : md[0]
109
+ end
110
+ debug("\tfup_name: " << fup_name.to_s)
111
+
112
+ if !fg.strip.empty? && !fup_group
113
+ # match a group
114
+ fup_group = line.match(Regexp.new(fg) ) do |md|
115
+ debug("\tmatch: " << md.to_s)
116
+ md.length == 2 ? md[1] : nil
117
+ end
118
+ end
119
+ debug "group is " << fup_group.to_s
120
+
121
+ # All that follows depends on the presence of a name
122
+ # in the intro-string.
123
+ if fup_name && !fup_name.strip.empty?
124
+ # keep the current intro for later
125
+ ointro = line
126
+ line = ''
127
+ while line.strip.empty?
128
+ i = i.next
129
+ line = @lines[i]
130
+ end
131
+ # check if there is a quote, at all
132
+ if(line.start_with?('>'))
133
+ debug("\tfound intro " << ointro)
134
+ # variables are part of the $intro.
135
+ # Do substitutions.
136
+ intro.sub!('%fup_name%', fup_name) if fup_name
137
+ intro.sub!('%fup_group%', fup_group) if fup_group
138
+ debug("\tsetting intro " << intro.to_s)
139
+
140
+ # exchange original intro-line against the new one
141
+ @lines[@lines.index(ointro)] = intro.strip
142
+ # looked complicated because it is.
143
+
144
+ # keep this line for reference.
145
+ @intro = intro
146
+ else
147
+ wmsg = 'Text following the intro is not a citation!'
148
+ wmsg << "\n\tIntro will NOT be modified: \"#{ointro}\"!"
149
+ warn wmsg
150
+ end
151
+ end
152
+ end # fn.strip.empty?
153
+ end # lines.each_with_index
154
+ end
155
+
156
+ def set_signature(signature)
157
+ # unless no changes requested.
158
+ if signature && !signature.empty?
159
+ # remove any signature(s) from
160
+ # the current article
161
+ sigpos = @lines.index('-- ')
162
+ debug('found signature in line ' << sigpos.to_s) if sigpos
163
+ @lines = @lines.slice(0, sigpos ) if sigpos
164
+ debug('setting signature ' << signature) if signature
165
+ @lines << "-- " << signature if signature
166
+ end
167
+ end
168
+
169
+ def join
170
+ new_body = @lines.join($LN) << $LN
171
+ # replace ellipsis
172
+ new_body.force_encoding('utf-8').gsub!('...', '…')
173
+ return new_body
174
+ end
175
+
176
+ # ------->
177
+ # TODO : Concentrate all URL/URI munging functionality in 1 or 2 helper
178
+ # classes, e.g. Body::News and Body::Http
179
+ # <------
180
+
181
+ # Verify and possibly correct links in the post.
182
+ # Simple.
183
+ def handle_uris()
184
+ # Determine here or elsewhere if URLs shall be verified.
185
+ # Default is no. nil or '' do qualify as default.
186
+ debug 'verify URLs ? ' << @@config.VFY_URLS.to_s
187
+ if @@config.VFY_URLS
188
+ debug 'verifying URLs'
189
+ @lines.each_with_index do | l, i |
190
+ # leave cited lines as they are.
191
+ if !l.start_with?( '>')
192
+ =begin Currently Unused
193
+ # IMPORTANT --------------------------------->
194
+ # IT IS HENCEFORTH PROHIBITED TO WRITE AN EMAIL-ADDRESS
195
+ # IN THE BODY OF A NEWS-POST AND TO NOT PREPEND IT WITH
196
+ # mailto:
197
+ # ... Because I do not know what to do in these cases,
198
+ # and I am the boss!
199
+ # <----------------------------
200
+
201
+ # BUGFIX : Urls with @
202
+ # BUGFIX : email address in the intro
203
+ new_line = handle_news(l) if !@intro || l.strip != @intro.strip
204
+ =end
205
+ # http(s)
206
+ if l.include?('http')
207
+ new_line = handle_http(l)
208
+ end #if http
209
+ @lines[i] = new_line if new_line
210
+ end # if '>'
211
+ end # @lines.each_with_index
212
+ end # if verify
213
+ end
214
+ #
215
+ # extract URL or other stuff, if configured for footnotes,
216
+ def handle_references()
217
+ # a symbol or string to mark the beginning an ending of a future footnote.
218
+ ref_delim = @@config.REFERENCES_DELIMITER
219
+ debug('references delimiter is ' << ref_delim)
220
+ references = Array.new
221
+ body = @lines.join($LN)
222
+ if ref_delim && !ref_delim.strip.empty?
223
+ unless ref_delim == ref_delim.reverse
224
+ ref_delim.strip!
225
+ ref_rx = Regexp.new(ref_delim.dup << ".*?" << ref_delim.reverse, Regexp::MULTILINE)
226
+ debug('ref_rx is ' << ref_rx.to_s)
227
+ index = 0
228
+ # I cannot work with an array, here, and apply the pattern
229
+ # to the whole body, over multiple lines, if need be.
230
+ begin
231
+ ref = body.match(ref_rx )
232
+ debug("found reference " << ref.to_s << " (length: " << (ref ? ref.to_s.size.to_s : '0') << ")") if ref && ref.length > 0
233
+ if ref
234
+ # ... This is some presentation thing and I think
235
+ # it works, too.
236
+ r = ref[0].gsub(/[ \t]+/, ' ').strip
237
+ r.gsub!("\n", "\n ")
238
+ references << r
239
+ index += 1
240
+ body.gsub!(ref[0], format(@@config.REFERENCE_FORMAT, index.to_s ))
241
+ end
242
+ end until ref == nil
243
+ debug("all references found:\n" << references.join('\n')) if !references.empty?
244
+ # re-wrap body
245
+ # ATTN! Does not work!
246
+ # body.wrap!(WRAP_LENGTH)
247
+
248
+ else
249
+ msg = 'The References Delimiter is the same in its reversed form.'
250
+ msg << "#{$LN}Cannot handle references or footnotes!"
251
+ error(msg)
252
+ end
253
+
254
+ if(references && !references.empty?)
255
+ # a line, separating the footnotes from the body of the article
256
+ body << $LN << @@config.REFERENCES_SEPARATOR << $LN
257
+ references.each_with_index do |r, i|
258
+ r = r.gsub(ref_delim, '').gsub(ref_delim.reverse,'')
259
+ num = (i + 1).to_s << ") "
260
+ r.strip!
261
+ debug 'reference ' << r
262
+ body << num << r << $LN
263
+ end
264
+ else
265
+ debug('no refences found')
266
+ end
267
+ end
268
+ @lines = body.split($LN)
269
+ @encoding = body.encoding
270
+ end
271
+ private
272
+
273
+ # handle <news:..@...>
274
+ def handle_news(l)
275
+ # ... “Do not trust nobody!”
276
+ if l.include?('@') && ! l.include?('http:') && ! l.include?('https:')
277
+ # And I forgot how this works. It does.
278
+ l.split.collect do |ele|
279
+ # angular brackets are there alright. Hurra.
280
+ url = ele.match(/\<(.*)\>/)
281
+ unless !url
282
+ debug(' with angular brackets: ' << url.inspect)
283
+ l = l.sub(url[1], 'news:' << url[1]) if !url[1].start_with?('mailto') && !url[1].start_with?('news')
284
+ else
285
+ url = ele.match(/\S+@(\.|[^\p{P}]+)\S+/)
286
+ if url
287
+ debug(' withOUT angular brackets: ' << url.inspect)
288
+ if !url[0].start_with?('mailto') && !url[0].start_with?('news')
289
+ l.sub!(url[0], 'news:' << url[0] )
290
+ end
291
+ l.sub!('news:', '<news:')
292
+ l.sub!(url[0], url[0] << '>')
293
+ end
294
+ end
295
+ end # url_strs = collect
296
+ debug(' news: line is now ' << l)
297
+ return l
298
+ end
299
+ end
300
+
301
+ def handle_http(l)
302
+ debug('handle http')
303
+ l_array = l.split
304
+ url_strs = l_array.collect do |ele|
305
+ if ele.strip.include?('http') && ele.match(/https?:\S+/)
306
+ ele.lstrip!
307
+ # ensure last character is alphanumeric or '>' or '/'
308
+ until ele.match(/([[:alnum:]]|>|\/)$/) do
309
+ ele.chop!
310
+ end
311
+ ele
312
+ end
313
+ end.compact
314
+ debug('url_strs: ' << url_strs.to_s)
315
+ url_strs.each do |str|
316
+ # keep str for final replacement
317
+ nstr = str.dup
318
+ debug( 'try to match http-link')
319
+ begin
320
+ # missing slashes ?
321
+ match = str.match(/(https?):(\/*)/)
322
+ if (!match || match.length != 3 || match[2] != '//')
323
+ debug 'cannot find (enough) slashes in the URL'
324
+ if str.match(/https:/)
325
+ nstr = str.sub(/https:\/*/, "https://")
326
+ elsif str.match(/http:/)
327
+ nstr = str.sub(/http:\/*/, "http://")
328
+ end
329
+ debug ' with slashes: ' << nstr
330
+ end
331
+ # missing angular brackets
332
+ nstr.sub!(/http/, "<http") if !nstr.include?( "<http")
333
+ nstr << '>' if !nstr.end_with? '>'
334
+ debug ' matching nstr ' << nstr
335
+ match = nstr.match(/\<?(https?:\/\/[^\>]*)\>?/)
336
+ # get the darned URL already!
337
+ url = match[1] if match
338
+ debug 'url is ' << url
339
+ # nstr = with_angular_brackets(nstr, url)
340
+ l.sub!(str, nstr)
341
+ rescue Exception => ex
342
+ # this had looked intelligent, once.
343
+ line = __LINE__
344
+ msg = "Line #{line}: Cannot match the url " << str << ' against the Regexp (' << ex.message << ')'
345
+ msg << "\nAborting. Bye"
346
+ fatal msg
347
+ STDERR.puts msg
348
+ exit false
349
+ end
350
+ end
351
+
352
+ return l
353
+ end
354
+ end
355
+ # ------- TEST --------
356
+
357
+ puts "eins zwei drei vier fünf sechs sieben acht neun zehn\n
358
+ eins zwei drei vier fünf sechs sieben acht neun zehn\n
359
+ \n
360
+ eins zwei drei vier fünf sechs sieben acht neun zehn\n
361
+ eins zwei drei vier fünf sechs sieben acht neun zehn".wrap!(20)
362
+
363
+ # Ω
364
+
@@ -0,0 +1,65 @@
1
+ #encoding: UTF-8
2
+ =begin
3
+ /***************************************************************************
4
+ * 2023-2024, Michael Uplawski <michael.uplawski@uplawski.eu> *
5
+ * This program is free software; you can redistribute it and/or modify *
6
+ * it under the terms of the WTFPL 2.0 or later, see *
7
+ * http://www.wtfpl.net/about/ *
8
+ * *
9
+ * This program is distributed in the hope that it will be useful, *
10
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of *
11
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. *
12
+ * *
13
+ ***************************************************************************/
14
+ =end
15
+
16
+ # Functions to apply colors to terminal output.
17
+ # This is stolen from the Internet and I have lost track of my own additions
18
+ # and modifications.
19
+
20
+ COLORS = {:default => 9, :black => 0, :red => 1, :green => 2, :yellow => 3, :blue => 4, :purple => 5, :cyan => 6, :white => 7 }
21
+
22
+ BG = 4
23
+ FG = 3
24
+ REGULAR = 0
25
+ BOLD = 1
26
+ UNDERLINE = 4
27
+ BLINK = 5
28
+ SWAP = 7
29
+ NEUTRAL = 0
30
+
31
+ STYLES = {:regular => REGULAR, :bold => BOLD, :underline => UNDERLINE, :blink => BLINK, :swap => SWAP, :neutral => NEUTRAL}
32
+
33
+ # Colorizes the given text. Color-code is either an escape-sequence or one of
34
+ # the symbols representing color-names in the COLORS hash.
35
+ def colorize(text, color_code)
36
+ if (COLORS.keys.include?(color_code) )
37
+ "\033[3#{COLORS[color_code]}m#{text}\033[0m"
38
+ else
39
+ "#{color_code}#{text}\033[0m"
40
+ end
41
+ end
42
+
43
+ def style(text, style_code)
44
+ "#{style_code}#{text}\033[0m"
45
+ end
46
+ # a function which allows to manipulate every known aspect of the ansi-output.
47
+ def colored_output(output_text, fg_color = :default, bg_color = :default, style = :regular , mode = :neutral )
48
+ "\033[%i;%i;%i%i;%i%im%s\033[0m" %[STYLES[mode.to_sym], STYLES[style.to_sym], FG, COLORS[fg_color.to_sym], BG, COLORS[bg_color.to_sym], output_text]
49
+ end
50
+
51
+ # convenience functions
52
+ def red(text); colorize(text, "\033[31m"); end
53
+ def green(text); colorize(text, "\033[32m"); end
54
+ def yellow(text); colorize(text, "\033[33m"); end
55
+ def purple(text); colorize(text, "\033[35m"); end
56
+ def cyan(text); colorize(text, "\033[36m"); end
57
+ def blue(text); colorize(text, "\033[34m"); end
58
+ def white(text); colorize(text, "\033[37m"); end
59
+
60
+ def black_on_white(text); colorize(colorize(text, "\033[30m"), "\033[47m");end
61
+ def white_on_black(text); colorize(colorize(text, "\033[37m"), "\033[40m");end
62
+
63
+ def bold(text); style(text, "\033[01m");end
64
+ def underline(text); style(text, "\033[04m");end
65
+
@@ -0,0 +1,143 @@
1
+ #encoding: UTF-8
2
+ =begin
3
+ /***************************************************************************
4
+ * 2023-2024, Michael Uplawski <michael.uplawski@uplawski.eu> *
5
+ * This program is free software; you can redistribute it and/or modify *
6
+ * it under the terms of the WTFPL 2.0 or later, see *
7
+ * http://www.wtfpl.net/about/ *
8
+ * *
9
+ * This program is distributed in the hope that it will be useful, *
10
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of *
11
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. *
12
+ * *
13
+ ***************************************************************************/
14
+ =end
15
+
16
+ require_relative 'basic_logging'
17
+ require_relative 'version'
18
+ require 'yaml'
19
+ require 'singleton'
20
+
21
+ # An object of this class represents the configuration of the program.
22
+ # The parameters are set in the configuration-file
23
+
24
+ class Configuration
25
+ include Singleton
26
+ include BasicLogging
27
+
28
+
29
+ def initialize()
30
+ debug 'Configuration::initialize()'
31
+ confname = PROGNAME << '.conf'
32
+ # try to open user-configuration
33
+ @config_file = ENV['HOME'].dup << File::Separator << '.' << confname
34
+ # if user-configuration does not exist, copy installed version.
35
+ installed_config = File::dirname(File::absolute_path(__FILE__)) << File::Separator << confname
36
+ debug 'installed configuration is in ' << installed_config
37
+ if !File.exist?(@config_file)
38
+ begin
39
+ File.open(@config_file, 'w') do |cf|
40
+ cf.write(File.read(installed_config ) )
41
+ end
42
+ rescue => ex
43
+ STDERR.puts('Cannot write user-configuration to ' << @config_file << '! (' << ex.message << ')' )
44
+ end
45
+ end
46
+ # read the config-file.
47
+ read_config
48
+ # updates the user-configuration *ONLY*
49
+ # if necessary
50
+ update_config(installed_config)
51
+ end
52
+
53
+ # What this object does not have, may still be in the configuration.
54
+ def method_missing(method, args = nil)
55
+ if @conf
56
+ v = @conf[method]
57
+ debug('method_missing returns value for ' << method.to_s << ': |' << v.to_s << '|')
58
+ return v
59
+ else
60
+ error("user-version of the configuration (#{@conf}) is not accessible")
61
+ end
62
+ end
63
+
64
+ # set a configuration option.
65
+ def set(key, value)
66
+ if @conf
67
+ @conf[key] = value
68
+ if key == 'DEBUG_LOG'
69
+ set_target value
70
+ end
71
+ end
72
+ end
73
+
74
+ attr_reader :log_target, :log_level
75
+ private
76
+
77
+ # updates the configuration file, if need be.
78
+ def update_config(i_config)
79
+ if @conf
80
+ conf_version = @conf[:CONFIG_VERSION]
81
+ if !conf_version || conf_version < PROGVERSION.to_f
82
+ info "configuration has an older version number, looking for changes"
83
+ i_conf = YAML::load_file(i_config)
84
+ i_conf.transform_keys!{|k| k.to_sym}
85
+ i_keys = i_conf.keys
86
+ i_keys.each do |k|
87
+ if !@conf[k]
88
+ info('new configuration option: ' << k.to_s)
89
+ @conf[k] = ''
90
+ end
91
+ end
92
+ @conf[:CONFIG_VERSION] = PROGVERSION.to_f
93
+ bak_conf = @config_file.dup << '_' << conf_version.to_s
94
+ bak_conf.gsub!('.' << PROGNAME, 'bak_' << PROGNAME)
95
+ info('Old configuration is saved to ' << bak_conf )
96
+ info('New configuration is saved to ' << @config_file )
97
+ begin
98
+ File::write(bak_conf, File.read(@config_file))
99
+ File::write(@config_file, @conf.to_yaml)
100
+ rescue Exception => ex
101
+ msg = "Cannot write altered configuration to " << @config_file << "!\n\t" << ex.message
102
+ STDERR.puts(msg << "\nAborting, bye\n")
103
+ error msg
104
+ exit false
105
+ end
106
+ end
107
+ else
108
+ error "User-version of the configuration (#{@conf}) inaccessibble."
109
+ exit false
110
+ end
111
+ end
112
+
113
+ # reads the configuration and sets some properties.
114
+ def read_config
115
+ if File::exist?(@config_file) && File::readable?(@config_file)
116
+ begin
117
+ @conf = YAML::load_file(@config_file)
118
+
119
+ @conf.transform_keys!{|k| k.to_sym}
120
+ @keys = @conf.keys
121
+ clevel = @conf[:LOG_LEVEL]
122
+ set_level clevel
123
+ target_string = @conf[:DEBUG_LOG]
124
+ set_target (target_string == 'STDOUT' ? STDOUT : target_string)
125
+ # clear the log file.
126
+ clear_log
127
+ debug('log target and -level set: ' << @@log_level.to_s << ', ' << target_string)
128
+ rescue Exception => ex
129
+ msg = "Cannot use the configuration-file (" << @config_file << ")"
130
+ msg << "\n\t" << ex.message
131
+ # No log.
132
+ # error msg
133
+ STDERR.puts(msg << "\nAborting, bye\n")
134
+ exit false
135
+ end
136
+ else
137
+ STDERR.puts("Cannot read the configuration-file (" << @config_file << ")")
138
+ exit false
139
+ end
140
+ end
141
+ end
142
+ # EOF
143
+