flnews_post_proc 1.7

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,173 @@
1
+ #/***************************************************************************
2
+ # * 2023-2024, Michael Uplawski <michael.uplawski@uplawski.eu> *
3
+ # * *
4
+ # * This program is free software; you can redistribute it and/or modify *
5
+ # * it under the terms of the DO WHAT THE FUCK YOU WANT TO PUBLIC LICENSE *
6
+ # * *
7
+ # * This program is distributed in the hope that it will be useful, *
8
+ # * but WITHOUT ANY WARRANTY; without even the implied warranty of *
9
+ # * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. *
10
+ # * *
11
+ # ***************************************************************************/
12
+ # This is a YAML file. Keep intact these three dashes.
13
+ ---
14
+ # CONFIG_VERSION – DO NOT MODIFY
15
+ # If this version is smaller than that of the current program, this file
16
+ # may be updated automatically. Your settings should not be altered in
17
+ # the process, if they are still valid.
18
+ # ATTN! COMMENTS WILL BE REMOVED but a copy of your previous configuration
19
+ # will be saved in a file with a version suffix.
20
+ CONFIG_VERSION: 1.35
21
+
22
+ # FUP_NAME
23
+ # A Regular Expression, describing the string which contains the name of
24
+ # previous poster who is the author of a quoted post. This string is
25
+ # recognized in the original article and may be used with the fitting element
26
+ # from GROUP_INTRO, below. The Regexp-format is that of the Regexp class in
27
+ # Ruby, noted as a String. Beware to mask a backslash '\' by another one,
28
+ # like in the example. A capture-group '()' serves to extract the name from the
29
+ # match result.
30
+ # Leave this field empty to keep the default from the FLNews configuration
31
+ # intact.
32
+ # CONTENT: A String equivalent of a regular expression.
33
+ # DEFAULT: EMPTY
34
+ # EXAMPLE1: "Am \\d+.\\d+.\\d{2,4} um \\d+:\\d+ schrieb (.*):"
35
+ # EXAMPLE2: "(.*) wrote:"
36
+ FUP_NAME: "(.*) wrote in"
37
+
38
+ #FUP_GROUP
39
+ # A Regular Expression, describing the string which contains the newsgroup
40
+ # where the previous post, that you are referring to in the followup, had been
41
+ # published.
42
+ # Leave this field empty to ignore the precise group.
43
+ # CONTENT: A String equivalent of a regular expression.
44
+ # DEFAULT: EMPTY
45
+ # EXAMPLE: "wrote in (.*):"
46
+ FUP_GROUP: 'wrote in (.*)'
47
+
48
+ # GROUP_INTROS:
49
+ # Introductory strings, referring to the previous poster who is the author of a
50
+ # quoted post. If you match the newsgroup of the post (see FUP_GROUP), you can
51
+ # use these variables in the result.
52
+ # Currently only %fup_name% and %fup_group% are reproduced in the resulting
53
+ # introductory string.
54
+ # CONTENT: A newsgroup or regexp, followed by a colon, a space and a String.
55
+ # DEFAULT: As configured in FLNews
56
+ # EXAMPLE: alt.test: "Thus spoke #{fup_name} on that baleful #{fup_date}:"
57
+ GROUP_INTROS:
58
+ .*fr.test: "(%fup_name%) a écris :"
59
+ .*de.test: "%fup_name% hat in %fup_group% getestet:"
60
+ de\.*: "%fup_name% hat geschrieben:"
61
+ uk\.*: "%fup_name% wrote:"
62
+ fr\.*: "%fup_name% a écrit:"
63
+
64
+ # GROUP_SIGS
65
+ # A signature line per Newsgroup.
66
+ # ATTN! In multi line signatures, use \r\n for line breaks!
67
+ # CONTENT: A newsgroup or regexp, followed by a colon, a space and a String.
68
+ # DEFAULT: As configured in flnews
69
+ # EXAMPLE: alt.test: "Signature for alt.test\r\nsecond line"
70
+ GROUP_SIGS:
71
+ .*de.test: "newsgroup_hook .*de.test\r\nMit Zeilenumbruch"
72
+ .*fr.test: "newsgroup_hook .*fr.test\r\n2ème ligne, « guillemets »"
73
+ de.*: "„Erst betonieren, dann reinwerfen.”\r\n– „Erst Schulden machen, dann klugscheißen?”"
74
+ fr.*: "« Ton ordinateur de rêve ? » – « Le bleue. »\r\n "
75
+
76
+ # CUSTOM_HEADERS
77
+ # Additional headers for the outgoing article.
78
+ # If you wish to insert an "X-Post-Processor" header, the current version
79
+ # of the program will be appended.
80
+ # CONTENT: A dash and space, then a String, comprising the name of the header, ending in a
81
+ # colon and the value of the header
82
+ # DEFAULT: undefined
83
+ # EXAMPLE (two headers):
84
+ # - 'X-My-Header: nothing fancy'
85
+ # - 'X-Post-Processor: flnews_post_proc'
86
+ CUSTOM_HEADERS:
87
+ - "X-Post-Processor: flnews_post_proc"
88
+
89
+ # NO_ARCHIVE_GROUPS:
90
+ # The newsgroups, where a header X-No-Archive: YES shall be set.
91
+ # CONTENT: a dash and space, then a String, containing the name of the group
92
+ # or a regexp.
93
+ # DEFAULT: empty
94
+ # EXAMPLE: - "alt.test"
95
+ NO_ARCHIVE_GROUPS:
96
+ - ".*.test"
97
+
98
+ # DEBUG_LOG:
99
+ # The name of a file, where debug messages are written. Setting this
100
+ # variable will enable the log. Leave empty to disable logging.
101
+ # CONTENT: The name of a writable file, which will be overwritten.
102
+ # DEFAULT: empty
103
+ # EXAMPLE: '/tmp/a_log-file.txt'
104
+ DEBUG_LOG: '/tmp/flnews_post_proc.log'
105
+
106
+ # LOG LEVEL
107
+ # One of debug, fatal, error, info, warn
108
+ LOG_LEVEL: 'info'
109
+
110
+ # REFERENCES_SEPARATOR
111
+ # A symbol or sequence of symbols marking the end of the message-body and the
112
+ # beginning of a list of “references” or “footnotes”. It will only appear, if
113
+ # the original message-body contains text marked for use as such a footnote. See
114
+ # REFERENCES_DELIMITER.
115
+ # If the option is not defined or empty, the list of footnotes will appear
116
+ # below the last line of the message body and no separator will be inserted.
117
+ # CONTENT: A quoted symbol or sequence of symbols.
118
+ # DEFAULT: empty
119
+ # EXAMPLE: '---------'
120
+ # REFERENCES_SEPARATOR: "▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁"
121
+ REFERENCES_SEPARATOR: "──────────────────"
122
+
123
+ # REFERENCE_DELIMITER
124
+ # A symbol or sequence of symbols marking the beginning of a text which will
125
+ # serve as footnote (or reference). The reversed sequence musst be used to mark
126
+ # the end of the text. The presence of this sequence or symbol in the origimal
127
+ # message body will cause the enclosed text to be moved below the message body.
128
+ # The REFERENCES_SEPARATOR, if defined, will separate the message from the list
129
+ # of footnotes.
130
+ # If this option is not defined or empty, footnotes are not created.
131
+ # CONTENT a quoted symbol or sequence of symbols.
132
+ # DEFAULT: none/empty
133
+ # EXAMPLE: '%?'
134
+ REFERENCES_DELIMITER: "%="
135
+
136
+ # REFERENCE_FORMAT
137
+ # A format-string, using %s for a number, replacing the reference-
138
+ # text in the message body.
139
+ # DEFAULT: " %s]" -> becomes 1) ... 2) ... 3)
140
+ # EXAMPLE: "(%s)" -> becomes (1) ... (2) ... (3)
141
+ REFERENCE_FORMAT: " ➤%s"
142
+
143
+ # VFY_URLS
144
+ # A Boolean constant. It determines if the program shall verify and possibly
145
+ # try to correct URLs. Even if URLs are identified as such, only a few
146
+ # manipulations are attempted :
147
+ # * Angular brackets '<' and '>' are added, if missing
148
+ # -----> handling <news:/> is disabled
149
+ # * Article-references are prepended with "news:", if missing
150
+ # * Slashes are added, if they are found missing after "http(s):"
151
+ # ATTN! The program is unable to discern "mailto:" and "news:" references. If
152
+ # neither is given, but '@' is present, "news:" is automatically prepended.
153
+ # <-----
154
+ # If the variable is not set, a value 'yes' is assumed.
155
+ # CONTENT: One of YES, yes, NO, no, and other variations of case.
156
+ # DEFAULT: No
157
+ # Example: ... I let you guess.
158
+ VFY_URLS: No
159
+
160
+ # OVERRIDE_CONFIG
161
+ # A Boolean constant. You can choose to override the following
162
+ # configuration options before an article is posted:
163
+ # GROUP_SIGS, XNAY_GROUPS, CUSTOM_HEADERS and DEBUG_LOG.
164
+ # A dialog may be displayed which allows you to disable any of these
165
+ # four options, so that the defaults from flnews prevail.
166
+ # A fifth option (PP) can be used to disable post-processing completely.
167
+ #
168
+ # Set this option to no, NO or similar to disable the dialog.
169
+ # DEFAULT: yes
170
+ OVERRIDE_CONFIG: YES
171
+
172
+ # EOF
173
+
@@ -0,0 +1,72 @@
1
+ #encoding: UTF-8
2
+ =begin
3
+ /***************************************************************************
4
+ * 2023-2024, Michael Uplawski <michael.uplawski@uplawski.eu> *
5
+ * This program is free software; you can redistribute it and/or modify *
6
+ * it under the terms of the WTFPL 2.0 or later, see *
7
+ * http://www.wtfpl.net/about/ *
8
+ * *
9
+ * This program is distributed in the hope that it will be useful, *
10
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of *
11
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. *
12
+ * *
13
+ ***************************************************************************/
14
+ =end
15
+
16
+ require 'yaml'
17
+ require 'ostruct'
18
+
19
+ # --needs the Diffy gem
20
+ require 'diffy'
21
+
22
+ require_relative 'configuration'
23
+ require_relative 'basic_logging'
24
+ require_relative 'headers'
25
+ require_relative 'body'
26
+
27
+ # line-break in the final article
28
+ $LN = "\r\n"
29
+
30
+ # The main application class.
31
+ # Does it.
32
+ class PostProcessor
33
+ include BasicLogging
34
+
35
+ def initialize(article_text)
36
+ @config = Configuration.instance
37
+ # for simplicity.
38
+ # separate the headers and the body.
39
+ debug ' initializing headers'
40
+ headers = Headers.new(article_text)
41
+ debug('headers is ' << headers.inspect)
42
+ body = Body.new(article_text)
43
+
44
+ debug('calling headers.update')
45
+ headers.update()
46
+
47
+ newsgroups = headers.newsgroups
48
+
49
+ # Order matters. These actions work on a
50
+ # preliminary version of the article, each
51
+ # one on the result of the previous !
52
+ body.set_intro(newsgroups.intro)
53
+
54
+ # if need be, extract references and footnotes.
55
+ body.handle_references
56
+ body.set_signature(newsgroups.signature)
57
+
58
+ # verify and eventually correct URIs.
59
+ # Will only handle http(s) for the time.
60
+ body.handle_uris
61
+
62
+ # get the headers and the body as a string.
63
+ # Assemble.
64
+ @article = headers.join << $LN << body.join
65
+
66
+ diff = Diffy::Diff.new(article_text, @article, :context => 2).to_s
67
+ info("\n" << "–" * 20 << "\nDiffs\n" << "–" * 20 << "\n" << diff)
68
+ end
69
+
70
+ attr_reader :article
71
+ end
72
+ # EOF
data/lib/headers.rb ADDED
@@ -0,0 +1,172 @@
1
+ #encoding: UTF-8
2
+ =begin
3
+ /***************************************************************************
4
+ * 2023-2024, Michael Uplawski <michael.uplawski@uplawski.eu> *
5
+ * This program is free software; you can redistribute it and/or modify *
6
+ * it under the terms of the WTFPL 2.0 or later, see *
7
+ * http://www.wtfpl.net/about/ *
8
+ * *
9
+ * This program is distributed in the hope that it will be useful, *
10
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of *
11
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. *
12
+ * *
13
+ ***************************************************************************/
14
+ =end
15
+
16
+ require_relative 'basic_logging'
17
+ require_relative 'configuration'
18
+ require_relative 'newsgroups'
19
+
20
+ # an object of this class represents the headers of a news-article
21
+
22
+ class Headers
23
+ include BasicLogging
24
+
25
+ # read the headers from the article
26
+ def initialize(article_text)
27
+
28
+ @config = Configuration.instance
29
+ line = nil
30
+ # transform the article to an array.
31
+ debug('before split, article_text is : ' << article_text)
32
+ line_array = article_text.split($LN)
33
+
34
+ # Emsure that all three headers are present.
35
+ missing_header = ['From:', 'Newsgroups:', 'Message-ID:'].detect{|h| ! line_array.any?{|l| l.match(h) } }
36
+ if(missing_header)
37
+ msg = "Input does not look like a news-article, no #{missing_header.delete(':')}; aborting."
38
+ STDERR.puts msg
39
+ error(msg)
40
+ exit false
41
+ end
42
+
43
+ debug('after split, line_array is : ' << line_array.inspect)
44
+ # find the first empty line
45
+ end_index = line_array.index {|ele| ele.strip == ''}
46
+ # keep the preceding lines.
47
+ begin
48
+ @lines = line_array.slice!(0, end_index)
49
+ rescue Exception => ex
50
+ msg = 'ERROR: cannot split the input into lines: ' << self.class.name << ': ' << ex.message
51
+ # console
52
+ STDERR.puts msg
53
+ # log
54
+ error(msg)
55
+ exit false
56
+ end
57
+
58
+ debug('headers: ' << @lines.to_s)
59
+
60
+ # headername: headervalue
61
+ @headers = {}
62
+
63
+ # fill the headers Hash from the header-lines.
64
+ # headers may have been line-wrapped.
65
+
66
+ cur_header = nil
67
+ @lines.each do |l|
68
+ # has the header been wrapped?
69
+ if !l.start_with?(/\s+/)
70
+ # header is all before the first colon
71
+ begin
72
+ cur_header = l.match(/^(.*?):/)[1].to_sym
73
+ rescue Exception => ex
74
+ error ("Cannot match a header in line " << l << "(" << ex.message << ")")
75
+ exit false;
76
+ end
77
+ # Consider the two following fixes as preliminary until proven.
78
+ # Getting older, this kind of problem occupies me a lot more than
79
+ # it should.
80
+ #
81
+ # value is all after the first colon
82
+ # BUGGY: val = l.match(/:(.*)/)[1].strip
83
+ # BUGFIX 3/2024, use lstrip
84
+ val = l.match(/:(.*)/)[1].lstrip
85
+ else # start_with?(' ')
86
+ # a wrapped value is not devided
87
+ # BUGGY: val = l.strip
88
+ # BUGFIX 3/2024
89
+ val = l
90
+ end
91
+ # add value to the existing
92
+ if cur_header && @headers[cur_header]
93
+ @headers[cur_header] += val
94
+ else
95
+ # or add a new value
96
+ @headers[cur_header] = val
97
+ end
98
+ #@headers[l.match(/^(.*?):/)[1].to_sym] = l.match(/:(.*)/)[1].strip
99
+ # h = l.split(':')
100
+ # @headers[h[0].strip.to_sym] = h[1...h.size].join(':').strip
101
+ end
102
+ debug('headers are ' << @headers.to_s)
103
+ @newsgroups = Newsgroups.new(header(:Newsgroups))
104
+ debug('Newsgroups is ' << @newsgroups.inspect)
105
+
106
+ end
107
+
108
+ # returns the value of header 'name'
109
+ def header(name)
110
+ # name must be a symbol.
111
+ if name.respond_to?(:to_sym)
112
+ @headers[name]
113
+ else
114
+ error(name.to_s << ' is not a symbol!')
115
+ nil
116
+ end
117
+ end
118
+
119
+ # Modify headers, if need be.
120
+ def update()
121
+ no_archive = @newsgroups.no_archive
122
+ debug('no_archive should be set now : ' << no_archive.to_s)
123
+ if no_archive
124
+ @headers["X-No-Archive".to_sym] = no_archive
125
+ @headers["Archive".to_sym] = 'no'
126
+ end
127
+ if @config.CUSTOM_HEADERS
128
+ ch = @config.CUSTOM_HEADERS
129
+ debug('setting custom headers : ' << ch.inspect)
130
+ @config.CUSTOM_HEADERS.each do |pair|
131
+ ch = pair.split(':')
132
+ hn = ch[0].strip
133
+ hv = ch[1].strip
134
+ # Ensure header is ascii only
135
+ if hv.ascii_only? && hn.ascii_only?
136
+ # <---------- special treatment Post-Processor ---------->
137
+ hv << ' ' << PROGVERSION.to_s if hn == 'X-Post-Processor' && hv == 'flnews_post_proc'
138
+ # >----------<
139
+ @headers[hn.to_sym] = hv
140
+ else
141
+ warn "Custom header [#{hn}:#{hv}] should be ASCII only! Header is ignored!"
142
+ end
143
+ end
144
+ @headers.compact!
145
+ end
146
+ debug('updated headers are ' << @headers.inspect)
147
+ end
148
+
149
+ # remove a header
150
+ def remove(name)
151
+ @headers.delete(name) if @headers[name]
152
+ end
153
+
154
+ # basically a replacement for header(name), above.
155
+ # But you can call self.Newsgroups or self.From etc.
156
+ def method_missing(method, args = nil)
157
+ return @headers[method] if @headers[method]
158
+ error("unknown symbol '#{method}'")
159
+ end
160
+
161
+ # return the headers as a String.
162
+ def join
163
+ htext = ''
164
+ @headers.each_pair {|h, t| htext << h.to_s << ': ' << t << $LN }
165
+ debug('joined headers: ' << htext)
166
+ htext
167
+ end
168
+
169
+ attr_reader :lines, :newsgroups
170
+
171
+ end
172
+ # EOF
data/lib/newsgroups.rb ADDED
@@ -0,0 +1,152 @@
1
+ #encoding: UTF-8
2
+ =begin
3
+ /***************************************************************************
4
+ * 2023-2024, Michael Uplawski <michael.uplawski@uplawski.eu> *
5
+ * This program is free software; you can redistribute it and/or modify *
6
+ * it under the terms of the WTFPL 2.0 or later, see *
7
+ * http://www.wtfpl.net/about/ *
8
+ * *
9
+ * This program is distributed in the hope that it will be useful, *
10
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of *
11
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. *
12
+ * *
13
+ ***************************************************************************/
14
+ =end
15
+
16
+ # An object of this class concentrates the specificities of chosen
17
+ # newsgroups, as defined in the configuration.
18
+
19
+ require_relative 'configuration'
20
+ require_relative 'basic_logging'
21
+
22
+ class Newsgroups
23
+
24
+ include BasicLogging
25
+ def initialize(groups)
26
+ @config = Configuration.instance
27
+ @groups = groups.split(',')
28
+ debug('set signature, intro, no_archive')
29
+ # set details for this post
30
+ if @groups.size == 1
31
+ set_signature
32
+ set_intro
33
+ set_no_archive
34
+ end
35
+ end
36
+
37
+ def no_archive
38
+ debug('returning ' <<( @no_archive ? @no_archive : ' nil ') )
39
+ return @no_archive ? @no_archive : nil
40
+ end
41
+ attr_reader :signature, :intro, :groups
42
+
43
+ private
44
+
45
+ # defines the intro-line as per group.
46
+ def set_intro
47
+
48
+ @intro = nil
49
+ # only one group.
50
+ group = @groups[0]
51
+ # all configured intro-lines.
52
+ gintros = @config.GROUP_INTROS
53
+
54
+ if gintros && gintros.respond_to?(:to_hash)
55
+ # find the intro for the group.
56
+ # either by name
57
+ if gintros.keys.include?(group)
58
+ @intro = gintros[group]
59
+ else
60
+ # or by a regular expression.
61
+ gintros.each do |gr, intro|
62
+ unless @intro
63
+ @intro = intro if group.match(gr)
64
+ debug "matched group against " << gr if @intro
65
+ end
66
+ end
67
+ end
68
+ debug('group_intro is ' << @intro.to_s)
69
+ else
70
+ msg = 'Cannot set the introduction line from the configuration!'
71
+ msg << "\nPlease verify that GROUP_INTROS is set"
72
+ warn(msg)
73
+ end
74
+ end
75
+
76
+ # replace all \n by \r\n
77
+ def correct_linebreaks(text)
78
+ warned = false
79
+ # find all lonely \n
80
+ while text.match(/([^\r])\n/) do
81
+ warn("ATTN! Line-breaks should be \\r\\n! Verify signatures!" ) if !warned
82
+ warned ||= true
83
+ # ... and silently marry them to \r
84
+ text.gsub!($~[0],$~[1] + "\r\n")
85
+ text.gsub!("\n ", "\n")
86
+ # Luxury you can afford.
87
+ end # \n
88
+ text
89
+ end
90
+
91
+ # define the signature
92
+ def set_signature
93
+ @signature = nil
94
+ # 1 group
95
+ group = @groups[0]
96
+ gsigs = @config.GROUP_SIGS
97
+
98
+ if gsigs && gsigs.respond_to?(:to_hash)
99
+ # find the signature for the group
100
+ # either by name
101
+ if gsigs.keys.include?(group)
102
+ @signature = gsigs[group]
103
+ debug('signature is ' << @signature ) if @signature
104
+ # .., or by applying a regexp.
105
+ else
106
+ gsigs.each do |g, s|
107
+ unless @signature
108
+ rg = Regexp.new(g)
109
+ sm = group.match(rg)
110
+ debug('signature for group(s) ' << g << ': ' << s) if sm
111
+ if sm
112
+ @signature = s
113
+ end # if sm
114
+ end # if no signature
115
+ end # gsigs.each
116
+ end # gsigs for group?
117
+ if(@signature && @signature.strip.start_with?('/') )
118
+ debug('picking signature from file ' << @signature)
119
+ @signature = pick_sig(@signature)
120
+ end
121
+ @signature = correct_linebreaks(@signature) if @signature
122
+ else # gsigs and is hash?
123
+ msg = "Cannot read the signatures from the configuration."
124
+ msg << "\nPlease verify that GROUP_SIGS is set."
125
+ warn(msg)
126
+ end
127
+ end
128
+
129
+ # pick a signature from a file
130
+ def pick_sig(file)
131
+ if file && File.exist?(file) && File.readable?(file)
132
+ allSigs = File::read(file).force_encoding('utf-8').split("\n\n")
133
+ numSigs = allSigs.length
134
+
135
+ srand(Time.now.nsec)
136
+ return allSigs[rand(numSigs)]
137
+ else
138
+ error 'cannot read signature from file ' << file
139
+ end
140
+ end
141
+
142
+ # define the no_archive header.
143
+ def set_no_archive
144
+ @no_archive = nil
145
+ xgs = @config.NO_ARCHIVE_GROUPS
146
+ if xgs && !xgs.empty? && xgs.detect {|g| @groups[0].match(g) }
147
+ debug("setting no_archive")
148
+ @no_archive = 'yes'
149
+ end
150
+ end
151
+ end
152
+ # EOF