flnews_post_proc 1.7

Sign up to get free protection for your applications and to get access to all the features.
@@ -0,0 +1,173 @@
1
+ #/***************************************************************************
2
+ # * 2023-2024, Michael Uplawski <michael.uplawski@uplawski.eu> *
3
+ # * *
4
+ # * This program is free software; you can redistribute it and/or modify *
5
+ # * it under the terms of the DO WHAT THE FUCK YOU WANT TO PUBLIC LICENSE *
6
+ # * *
7
+ # * This program is distributed in the hope that it will be useful, *
8
+ # * but WITHOUT ANY WARRANTY; without even the implied warranty of *
9
+ # * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. *
10
+ # * *
11
+ # ***************************************************************************/
12
+ # This is a YAML file. Keep intact these three dashes.
13
+ ---
14
+ # CONFIG_VERSION – DO NOT MODIFY
15
+ # If this version is smaller than that of the current program, this file
16
+ # may be updated automatically. Your settings should not be altered in
17
+ # the process, if they are still valid.
18
+ # ATTN! COMMENTS WILL BE REMOVED but a copy of your previous configuration
19
+ # will be saved in a file with a version suffix.
20
+ CONFIG_VERSION: 1.35
21
+
22
+ # FUP_NAME
23
+ # A Regular Expression, describing the string which contains the name of
24
+ # previous poster who is the author of a quoted post. This string is
25
+ # recognized in the original article and may be used with the fitting element
26
+ # from GROUP_INTRO, below. The Regexp-format is that of the Regexp class in
27
+ # Ruby, noted as a String. Beware to mask a backslash '\' by another one,
28
+ # like in the example. A capture-group '()' serves to extract the name from the
29
+ # match result.
30
+ # Leave this field empty to keep the default from the FLNews configuration
31
+ # intact.
32
+ # CONTENT: A String equivalent of a regular expression.
33
+ # DEFAULT: EMPTY
34
+ # EXAMPLE1: "Am \\d+.\\d+.\\d{2,4} um \\d+:\\d+ schrieb (.*):"
35
+ # EXAMPLE2: "(.*) wrote:"
36
+ FUP_NAME: "(.*) wrote in"
37
+
38
+ #FUP_GROUP
39
+ # A Regular Expression, describing the string which contains the newsgroup
40
+ # where the previous post, that you are referring to in the followup, had been
41
+ # published.
42
+ # Leave this field empty to ignore the precise group.
43
+ # CONTENT: A String equivalent of a regular expression.
44
+ # DEFAULT: EMPTY
45
+ # EXAMPLE: "wrote in (.*):"
46
+ FUP_GROUP: 'wrote in (.*)'
47
+
48
+ # GROUP_INTROS:
49
+ # Introductory strings, referring to the previous poster who is the author of a
50
+ # quoted post. If you match the newsgroup of the post (see FUP_GROUP), you can
51
+ # use these variables in the result.
52
+ # Currently only %fup_name% and %fup_group% are reproduced in the resulting
53
+ # introductory string.
54
+ # CONTENT: A newsgroup or regexp, followed by a colon, a space and a String.
55
+ # DEFAULT: As configured in FLNews
56
+ # EXAMPLE: alt.test: "Thus spoke #{fup_name} on that baleful #{fup_date}:"
57
+ GROUP_INTROS:
58
+ .*fr.test: "(%fup_name%) a écris :"
59
+ .*de.test: "%fup_name% hat in %fup_group% getestet:"
60
+ de\.*: "%fup_name% hat geschrieben:"
61
+ uk\.*: "%fup_name% wrote:"
62
+ fr\.*: "%fup_name% a écrit:"
63
+
64
+ # GROUP_SIGS
65
+ # A signature line per Newsgroup.
66
+ # ATTN! In multi line signatures, use \r\n for line breaks!
67
+ # CONTENT: A newsgroup or regexp, followed by a colon, a space and a String.
68
+ # DEFAULT: As configured in flnews
69
+ # EXAMPLE: alt.test: "Signature for alt.test\r\nsecond line"
70
+ GROUP_SIGS:
71
+ .*de.test: "newsgroup_hook .*de.test\r\nMit Zeilenumbruch"
72
+ .*fr.test: "newsgroup_hook .*fr.test\r\n2ème ligne, « guillemets »"
73
+ de.*: "„Erst betonieren, dann reinwerfen.”\r\n– „Erst Schulden machen, dann klugscheißen?”"
74
+ fr.*: "« Ton ordinateur de rêve ? » – « Le bleue. »\r\n "
75
+
76
+ # CUSTOM_HEADERS
77
+ # Additional headers for the outgoing article.
78
+ # If you wish to insert an "X-Post-Processor" header, the current version
79
+ # of the program will be appended.
80
+ # CONTENT: A dash and space, then a String, comprising the name of the header, ending in a
81
+ # colon and the value of the header
82
+ # DEFAULT: undefined
83
+ # EXAMPLE (two headers):
84
+ # - 'X-My-Header: nothing fancy'
85
+ # - 'X-Post-Processor: flnews_post_proc'
86
+ CUSTOM_HEADERS:
87
+ - "X-Post-Processor: flnews_post_proc"
88
+
89
+ # NO_ARCHIVE_GROUPS:
90
+ # The newsgroups, where a header X-No-Archive: YES shall be set.
91
+ # CONTENT: a dash and space, then a String, containing the name of the group
92
+ # or a regexp.
93
+ # DEFAULT: empty
94
+ # EXAMPLE: - "alt.test"
95
+ NO_ARCHIVE_GROUPS:
96
+ - ".*.test"
97
+
98
+ # DEBUG_LOG:
99
+ # The name of a file, where debug messages are written. Setting this
100
+ # variable will enable the log. Leave empty to disable logging.
101
+ # CONTENT: The name of a writable file, which will be overwritten.
102
+ # DEFAULT: empty
103
+ # EXAMPLE: '/tmp/a_log-file.txt'
104
+ DEBUG_LOG: '/tmp/flnews_post_proc.log'
105
+
106
+ # LOG LEVEL
107
+ # One of debug, fatal, error, info, warn
108
+ LOG_LEVEL: 'info'
109
+
110
+ # REFERENCES_SEPARATOR
111
+ # A symbol or sequence of symbols marking the end of the message-body and the
112
+ # beginning of a list of “references” or “footnotes”. It will only appear, if
113
+ # the original message-body contains text marked for use as such a footnote. See
114
+ # REFERENCES_DELIMITER.
115
+ # If the option is not defined or empty, the list of footnotes will appear
116
+ # below the last line of the message body and no separator will be inserted.
117
+ # CONTENT: A quoted symbol or sequence of symbols.
118
+ # DEFAULT: empty
119
+ # EXAMPLE: '---------'
120
+ # REFERENCES_SEPARATOR: "▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁"
121
+ REFERENCES_SEPARATOR: "──────────────────"
122
+
123
+ # REFERENCE_DELIMITER
124
+ # A symbol or sequence of symbols marking the beginning of a text which will
125
+ # serve as footnote (or reference). The reversed sequence musst be used to mark
126
+ # the end of the text. The presence of this sequence or symbol in the origimal
127
+ # message body will cause the enclosed text to be moved below the message body.
128
+ # The REFERENCES_SEPARATOR, if defined, will separate the message from the list
129
+ # of footnotes.
130
+ # If this option is not defined or empty, footnotes are not created.
131
+ # CONTENT a quoted symbol or sequence of symbols.
132
+ # DEFAULT: none/empty
133
+ # EXAMPLE: '%?'
134
+ REFERENCES_DELIMITER: "%="
135
+
136
+ # REFERENCE_FORMAT
137
+ # A format-string, using %s for a number, replacing the reference-
138
+ # text in the message body.
139
+ # DEFAULT: " %s]" -> becomes 1) ... 2) ... 3)
140
+ # EXAMPLE: "(%s)" -> becomes (1) ... (2) ... (3)
141
+ REFERENCE_FORMAT: " ➤%s"
142
+
143
+ # VFY_URLS
144
+ # A Boolean constant. It determines if the program shall verify and possibly
145
+ # try to correct URLs. Even if URLs are identified as such, only a few
146
+ # manipulations are attempted :
147
+ # * Angular brackets '<' and '>' are added, if missing
148
+ # -----> handling <news:/> is disabled
149
+ # * Article-references are prepended with "news:", if missing
150
+ # * Slashes are added, if they are found missing after "http(s):"
151
+ # ATTN! The program is unable to discern "mailto:" and "news:" references. If
152
+ # neither is given, but '@' is present, "news:" is automatically prepended.
153
+ # <-----
154
+ # If the variable is not set, a value 'yes' is assumed.
155
+ # CONTENT: One of YES, yes, NO, no, and other variations of case.
156
+ # DEFAULT: No
157
+ # Example: ... I let you guess.
158
+ VFY_URLS: No
159
+
160
+ # OVERRIDE_CONFIG
161
+ # A Boolean constant. You can choose to override the following
162
+ # configuration options before an article is posted:
163
+ # GROUP_SIGS, XNAY_GROUPS, CUSTOM_HEADERS and DEBUG_LOG.
164
+ # A dialog may be displayed which allows you to disable any of these
165
+ # four options, so that the defaults from flnews prevail.
166
+ # A fifth option (PP) can be used to disable post-processing completely.
167
+ #
168
+ # Set this option to no, NO or similar to disable the dialog.
169
+ # DEFAULT: yes
170
+ OVERRIDE_CONFIG: YES
171
+
172
+ # EOF
173
+
@@ -0,0 +1,72 @@
1
+ #encoding: UTF-8
2
+ =begin
3
+ /***************************************************************************
4
+ * 2023-2024, Michael Uplawski <michael.uplawski@uplawski.eu> *
5
+ * This program is free software; you can redistribute it and/or modify *
6
+ * it under the terms of the WTFPL 2.0 or later, see *
7
+ * http://www.wtfpl.net/about/ *
8
+ * *
9
+ * This program is distributed in the hope that it will be useful, *
10
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of *
11
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. *
12
+ * *
13
+ ***************************************************************************/
14
+ =end
15
+
16
+ require 'yaml'
17
+ require 'ostruct'
18
+
19
+ # --needs the Diffy gem
20
+ require 'diffy'
21
+
22
+ require_relative 'configuration'
23
+ require_relative 'basic_logging'
24
+ require_relative 'headers'
25
+ require_relative 'body'
26
+
27
+ # line-break in the final article
28
+ $LN = "\r\n"
29
+
30
+ # The main application class.
31
+ # Does it.
32
+ class PostProcessor
33
+ include BasicLogging
34
+
35
+ def initialize(article_text)
36
+ @config = Configuration.instance
37
+ # for simplicity.
38
+ # separate the headers and the body.
39
+ debug ' initializing headers'
40
+ headers = Headers.new(article_text)
41
+ debug('headers is ' << headers.inspect)
42
+ body = Body.new(article_text)
43
+
44
+ debug('calling headers.update')
45
+ headers.update()
46
+
47
+ newsgroups = headers.newsgroups
48
+
49
+ # Order matters. These actions work on a
50
+ # preliminary version of the article, each
51
+ # one on the result of the previous !
52
+ body.set_intro(newsgroups.intro)
53
+
54
+ # if need be, extract references and footnotes.
55
+ body.handle_references
56
+ body.set_signature(newsgroups.signature)
57
+
58
+ # verify and eventually correct URIs.
59
+ # Will only handle http(s) for the time.
60
+ body.handle_uris
61
+
62
+ # get the headers and the body as a string.
63
+ # Assemble.
64
+ @article = headers.join << $LN << body.join
65
+
66
+ diff = Diffy::Diff.new(article_text, @article, :context => 2).to_s
67
+ info("\n" << "–" * 20 << "\nDiffs\n" << "–" * 20 << "\n" << diff)
68
+ end
69
+
70
+ attr_reader :article
71
+ end
72
+ # EOF
data/lib/headers.rb ADDED
@@ -0,0 +1,172 @@
1
+ #encoding: UTF-8
2
+ =begin
3
+ /***************************************************************************
4
+ * 2023-2024, Michael Uplawski <michael.uplawski@uplawski.eu> *
5
+ * This program is free software; you can redistribute it and/or modify *
6
+ * it under the terms of the WTFPL 2.0 or later, see *
7
+ * http://www.wtfpl.net/about/ *
8
+ * *
9
+ * This program is distributed in the hope that it will be useful, *
10
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of *
11
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. *
12
+ * *
13
+ ***************************************************************************/
14
+ =end
15
+
16
+ require_relative 'basic_logging'
17
+ require_relative 'configuration'
18
+ require_relative 'newsgroups'
19
+
20
+ # an object of this class represents the headers of a news-article
21
+
22
+ class Headers
23
+ include BasicLogging
24
+
25
+ # read the headers from the article
26
+ def initialize(article_text)
27
+
28
+ @config = Configuration.instance
29
+ line = nil
30
+ # transform the article to an array.
31
+ debug('before split, article_text is : ' << article_text)
32
+ line_array = article_text.split($LN)
33
+
34
+ # Emsure that all three headers are present.
35
+ missing_header = ['From:', 'Newsgroups:', 'Message-ID:'].detect{|h| ! line_array.any?{|l| l.match(h) } }
36
+ if(missing_header)
37
+ msg = "Input does not look like a news-article, no #{missing_header.delete(':')}; aborting."
38
+ STDERR.puts msg
39
+ error(msg)
40
+ exit false
41
+ end
42
+
43
+ debug('after split, line_array is : ' << line_array.inspect)
44
+ # find the first empty line
45
+ end_index = line_array.index {|ele| ele.strip == ''}
46
+ # keep the preceding lines.
47
+ begin
48
+ @lines = line_array.slice!(0, end_index)
49
+ rescue Exception => ex
50
+ msg = 'ERROR: cannot split the input into lines: ' << self.class.name << ': ' << ex.message
51
+ # console
52
+ STDERR.puts msg
53
+ # log
54
+ error(msg)
55
+ exit false
56
+ end
57
+
58
+ debug('headers: ' << @lines.to_s)
59
+
60
+ # headername: headervalue
61
+ @headers = {}
62
+
63
+ # fill the headers Hash from the header-lines.
64
+ # headers may have been line-wrapped.
65
+
66
+ cur_header = nil
67
+ @lines.each do |l|
68
+ # has the header been wrapped?
69
+ if !l.start_with?(/\s+/)
70
+ # header is all before the first colon
71
+ begin
72
+ cur_header = l.match(/^(.*?):/)[1].to_sym
73
+ rescue Exception => ex
74
+ error ("Cannot match a header in line " << l << "(" << ex.message << ")")
75
+ exit false;
76
+ end
77
+ # Consider the two following fixes as preliminary until proven.
78
+ # Getting older, this kind of problem occupies me a lot more than
79
+ # it should.
80
+ #
81
+ # value is all after the first colon
82
+ # BUGGY: val = l.match(/:(.*)/)[1].strip
83
+ # BUGFIX 3/2024, use lstrip
84
+ val = l.match(/:(.*)/)[1].lstrip
85
+ else # start_with?(' ')
86
+ # a wrapped value is not devided
87
+ # BUGGY: val = l.strip
88
+ # BUGFIX 3/2024
89
+ val = l
90
+ end
91
+ # add value to the existing
92
+ if cur_header && @headers[cur_header]
93
+ @headers[cur_header] += val
94
+ else
95
+ # or add a new value
96
+ @headers[cur_header] = val
97
+ end
98
+ #@headers[l.match(/^(.*?):/)[1].to_sym] = l.match(/:(.*)/)[1].strip
99
+ # h = l.split(':')
100
+ # @headers[h[0].strip.to_sym] = h[1...h.size].join(':').strip
101
+ end
102
+ debug('headers are ' << @headers.to_s)
103
+ @newsgroups = Newsgroups.new(header(:Newsgroups))
104
+ debug('Newsgroups is ' << @newsgroups.inspect)
105
+
106
+ end
107
+
108
+ # returns the value of header 'name'
109
+ def header(name)
110
+ # name must be a symbol.
111
+ if name.respond_to?(:to_sym)
112
+ @headers[name]
113
+ else
114
+ error(name.to_s << ' is not a symbol!')
115
+ nil
116
+ end
117
+ end
118
+
119
+ # Modify headers, if need be.
120
+ def update()
121
+ no_archive = @newsgroups.no_archive
122
+ debug('no_archive should be set now : ' << no_archive.to_s)
123
+ if no_archive
124
+ @headers["X-No-Archive".to_sym] = no_archive
125
+ @headers["Archive".to_sym] = 'no'
126
+ end
127
+ if @config.CUSTOM_HEADERS
128
+ ch = @config.CUSTOM_HEADERS
129
+ debug('setting custom headers : ' << ch.inspect)
130
+ @config.CUSTOM_HEADERS.each do |pair|
131
+ ch = pair.split(':')
132
+ hn = ch[0].strip
133
+ hv = ch[1].strip
134
+ # Ensure header is ascii only
135
+ if hv.ascii_only? && hn.ascii_only?
136
+ # <---------- special treatment Post-Processor ---------->
137
+ hv << ' ' << PROGVERSION.to_s if hn == 'X-Post-Processor' && hv == 'flnews_post_proc'
138
+ # >----------<
139
+ @headers[hn.to_sym] = hv
140
+ else
141
+ warn "Custom header [#{hn}:#{hv}] should be ASCII only! Header is ignored!"
142
+ end
143
+ end
144
+ @headers.compact!
145
+ end
146
+ debug('updated headers are ' << @headers.inspect)
147
+ end
148
+
149
+ # remove a header
150
+ def remove(name)
151
+ @headers.delete(name) if @headers[name]
152
+ end
153
+
154
+ # basically a replacement for header(name), above.
155
+ # But you can call self.Newsgroups or self.From etc.
156
+ def method_missing(method, args = nil)
157
+ return @headers[method] if @headers[method]
158
+ error("unknown symbol '#{method}'")
159
+ end
160
+
161
+ # return the headers as a String.
162
+ def join
163
+ htext = ''
164
+ @headers.each_pair {|h, t| htext << h.to_s << ': ' << t << $LN }
165
+ debug('joined headers: ' << htext)
166
+ htext
167
+ end
168
+
169
+ attr_reader :lines, :newsgroups
170
+
171
+ end
172
+ # EOF
data/lib/newsgroups.rb ADDED
@@ -0,0 +1,152 @@
1
+ #encoding: UTF-8
2
+ =begin
3
+ /***************************************************************************
4
+ * 2023-2024, Michael Uplawski <michael.uplawski@uplawski.eu> *
5
+ * This program is free software; you can redistribute it and/or modify *
6
+ * it under the terms of the WTFPL 2.0 or later, see *
7
+ * http://www.wtfpl.net/about/ *
8
+ * *
9
+ * This program is distributed in the hope that it will be useful, *
10
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of *
11
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. *
12
+ * *
13
+ ***************************************************************************/
14
+ =end
15
+
16
+ # An object of this class concentrates the specificities of chosen
17
+ # newsgroups, as defined in the configuration.
18
+
19
+ require_relative 'configuration'
20
+ require_relative 'basic_logging'
21
+
22
+ class Newsgroups
23
+
24
+ include BasicLogging
25
+ def initialize(groups)
26
+ @config = Configuration.instance
27
+ @groups = groups.split(',')
28
+ debug('set signature, intro, no_archive')
29
+ # set details for this post
30
+ if @groups.size == 1
31
+ set_signature
32
+ set_intro
33
+ set_no_archive
34
+ end
35
+ end
36
+
37
+ def no_archive
38
+ debug('returning ' <<( @no_archive ? @no_archive : ' nil ') )
39
+ return @no_archive ? @no_archive : nil
40
+ end
41
+ attr_reader :signature, :intro, :groups
42
+
43
+ private
44
+
45
+ # defines the intro-line as per group.
46
+ def set_intro
47
+
48
+ @intro = nil
49
+ # only one group.
50
+ group = @groups[0]
51
+ # all configured intro-lines.
52
+ gintros = @config.GROUP_INTROS
53
+
54
+ if gintros && gintros.respond_to?(:to_hash)
55
+ # find the intro for the group.
56
+ # either by name
57
+ if gintros.keys.include?(group)
58
+ @intro = gintros[group]
59
+ else
60
+ # or by a regular expression.
61
+ gintros.each do |gr, intro|
62
+ unless @intro
63
+ @intro = intro if group.match(gr)
64
+ debug "matched group against " << gr if @intro
65
+ end
66
+ end
67
+ end
68
+ debug('group_intro is ' << @intro.to_s)
69
+ else
70
+ msg = 'Cannot set the introduction line from the configuration!'
71
+ msg << "\nPlease verify that GROUP_INTROS is set"
72
+ warn(msg)
73
+ end
74
+ end
75
+
76
+ # replace all \n by \r\n
77
+ def correct_linebreaks(text)
78
+ warned = false
79
+ # find all lonely \n
80
+ while text.match(/([^\r])\n/) do
81
+ warn("ATTN! Line-breaks should be \\r\\n! Verify signatures!" ) if !warned
82
+ warned ||= true
83
+ # ... and silently marry them to \r
84
+ text.gsub!($~[0],$~[1] + "\r\n")
85
+ text.gsub!("\n ", "\n")
86
+ # Luxury you can afford.
87
+ end # \n
88
+ text
89
+ end
90
+
91
+ # define the signature
92
+ def set_signature
93
+ @signature = nil
94
+ # 1 group
95
+ group = @groups[0]
96
+ gsigs = @config.GROUP_SIGS
97
+
98
+ if gsigs && gsigs.respond_to?(:to_hash)
99
+ # find the signature for the group
100
+ # either by name
101
+ if gsigs.keys.include?(group)
102
+ @signature = gsigs[group]
103
+ debug('signature is ' << @signature ) if @signature
104
+ # .., or by applying a regexp.
105
+ else
106
+ gsigs.each do |g, s|
107
+ unless @signature
108
+ rg = Regexp.new(g)
109
+ sm = group.match(rg)
110
+ debug('signature for group(s) ' << g << ': ' << s) if sm
111
+ if sm
112
+ @signature = s
113
+ end # if sm
114
+ end # if no signature
115
+ end # gsigs.each
116
+ end # gsigs for group?
117
+ if(@signature && @signature.strip.start_with?('/') )
118
+ debug('picking signature from file ' << @signature)
119
+ @signature = pick_sig(@signature)
120
+ end
121
+ @signature = correct_linebreaks(@signature) if @signature
122
+ else # gsigs and is hash?
123
+ msg = "Cannot read the signatures from the configuration."
124
+ msg << "\nPlease verify that GROUP_SIGS is set."
125
+ warn(msg)
126
+ end
127
+ end
128
+
129
+ # pick a signature from a file
130
+ def pick_sig(file)
131
+ if file && File.exist?(file) && File.readable?(file)
132
+ allSigs = File::read(file).force_encoding('utf-8').split("\n\n")
133
+ numSigs = allSigs.length
134
+
135
+ srand(Time.now.nsec)
136
+ return allSigs[rand(numSigs)]
137
+ else
138
+ error 'cannot read signature from file ' << file
139
+ end
140
+ end
141
+
142
+ # define the no_archive header.
143
+ def set_no_archive
144
+ @no_archive = nil
145
+ xgs = @config.NO_ARCHIVE_GROUPS
146
+ if xgs && !xgs.empty? && xgs.detect {|g| @groups[0].match(g) }
147
+ debug("setting no_archive")
148
+ @no_archive = 'yes'
149
+ end
150
+ end
151
+ end
152
+ # EOF