email_reply_trimmer 0.1.6 → 0.2.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +5 -5
- data/Gemfile +5 -0
- data/Gemfile.lock +98 -0
- data/README.md +4 -4
- data/Rakefile +4 -7
- data/devenv.lock +184 -0
- data/devenv.nix +4 -0
- data/devenv.yaml +8 -0
- data/email_reply_trimmer.gemspec +8 -2
- data/lib/email_reply_trimmer/delimiter_matcher.rb +3 -2
- data/lib/email_reply_trimmer/email_header_matcher.rb +21 -8
- data/lib/email_reply_trimmer/embedded_email_matcher.rb +53 -25
- data/lib/email_reply_trimmer/empty_line_matcher.rb +1 -0
- data/lib/email_reply_trimmer/quote_matcher.rb +1 -0
- data/lib/email_reply_trimmer/signature_matcher.rb +24 -9
- data/lib/email_reply_trimmer.rb +102 -37
- data/test/before/forwarded_apple.txt +1 -0
- data/test/before/forwarded_gmail.txt +1 -0
- data/test/elided/block_code_spacers.txt +0 -0
- data/test/elided/email_headers_5.txt +23 -0
- data/test/elided/embedded_ception.txt +3 -3
- data/test/elided/embedded_email_12.txt +2 -2
- data/test/elided/embedded_email_13.txt +9 -0
- data/test/elided/embedded_email_14.txt +11 -0
- data/test/elided/embedded_email_15.txt +4 -0
- data/test/elided/embedded_email_16.txt +4 -0
- data/test/elided/embedded_email_17.txt +2 -0
- data/test/elided/embedded_email_18.txt +1 -0
- data/test/elided/embedded_email_19.txt +0 -0
- data/test/elided/embedded_email_chinese.txt +4 -0
- data/test/elided/embedded_email_german_4.txt +15 -0
- data/test/elided/embedded_email_german_5.txt +20 -0
- data/test/elided/embedded_email_german_6.txt +8 -0
- data/test/elided/embedded_email_norwegian.txt +9 -0
- data/test/elided/embedded_email_polish_2.txt +7 -0
- data/test/elided/embedded_email_quote_text.txt +5 -0
- data/test/elided/embedded_email_russian_2.txt +23 -0
- data/test/elided/embedded_email_swedish.txt +8 -0
- data/test/elided/embedded_email_ukrainian.txt +17 -0
- data/test/elided/forwarded_apple.txt +15 -0
- data/test/elided/forwarded_gmail.txt +15 -0
- data/test/elided/signatures.txt +5 -0
- data/test/elided/spam_1.txt +75 -0
- data/test/elided/spam_2.txt +152 -0
- data/test/emails/block_code_spacers.txt +13 -0
- data/test/emails/email_headers_5.txt +37 -0
- data/test/emails/embedded_email_1.txt +1 -1
- data/test/emails/embedded_email_13.txt +14 -0
- data/test/emails/embedded_email_14.txt +16 -0
- data/test/emails/embedded_email_15.txt +9 -0
- data/test/emails/embedded_email_16.txt +16 -0
- data/test/emails/embedded_email_17.txt +38 -0
- data/test/emails/embedded_email_18.txt +7 -0
- data/test/emails/embedded_email_19.txt +13 -0
- data/test/emails/embedded_email_4.txt +13 -13
- data/test/emails/embedded_email_7.txt +4 -4
- data/test/emails/embedded_email_chinese.txt +7 -0
- data/test/emails/embedded_email_german_4.txt +18 -0
- data/test/emails/embedded_email_german_5.txt +23 -0
- data/test/emails/embedded_email_german_6.txt +14 -0
- data/test/emails/embedded_email_norwegian.txt +11 -0
- data/test/emails/embedded_email_polish_2.txt +11 -0
- data/test/emails/embedded_email_russian_2.txt +26 -0
- data/test/emails/embedded_email_swedish.txt +20 -0
- data/test/emails/embedded_email_ukrainian.txt +19 -0
- data/test/emails/forwarded_apple.txt +17 -0
- data/test/emails/forwarded_gmail.txt +17 -0
- data/test/emails/signatures.txt +5 -0
- data/test/emails/spam_1.txt +75 -0
- data/test/emails/spam_2.txt +174 -0
- data/test/embedded/forwarded_apple.txt +13 -0
- data/test/embedded/forwarded_gmail.txt +14 -0
- data/test/matchers/does_not_contain_embedded_email.txt +5 -0
- data/test/test_email_matcher.rb +16 -0
- data/test/test_email_reply_trimmer.rb +8 -3
- data/test/trimmed/block_code_spacers.txt +13 -0
- data/test/trimmed/email_headers_5.txt +11 -0
- data/test/trimmed/embedded_email_13.txt +3 -0
- data/test/trimmed/embedded_email_14.txt +3 -0
- data/test/trimmed/embedded_email_15.txt +3 -0
- data/test/trimmed/embedded_email_16.txt +11 -0
- data/test/trimmed/embedded_email_17.txt +35 -0
- data/test/trimmed/embedded_email_18.txt +5 -0
- data/test/trimmed/embedded_email_19.txt +13 -0
- data/test/trimmed/embedded_email_chinese.txt +2 -0
- data/test/trimmed/embedded_email_german_4.txt +1 -0
- data/test/trimmed/embedded_email_german_5.txt +1 -0
- data/test/trimmed/embedded_email_german_6.txt +4 -0
- data/test/trimmed/embedded_email_norwegian.txt +1 -0
- data/test/trimmed/embedded_email_polish_2.txt +2 -0
- data/test/trimmed/embedded_email_quote_text.txt +0 -5
- data/test/trimmed/embedded_email_russian_2.txt +1 -0
- data/test/trimmed/embedded_email_swedish.txt +9 -0
- data/test/trimmed/embedded_email_ukrainian.txt +1 -0
- data/test/trimmed/forwarded_apple.txt +1 -0
- data/test/trimmed/forwarded_gmail.txt +1 -0
- data/test/trimmed/spam_1.txt +0 -0
- data/test/trimmed/spam_2.txt +21 -0
- metadata +147 -15
- /data/test/elided/{embedded_email_polish.txt → embedded_email_polish_1.txt} +0 -0
- /data/test/elided/{embedded_email_russian.txt → embedded_email_russian_1.txt} +0 -0
- /data/test/emails/{embedded_email_polish.txt → embedded_email_polish_1.txt} +0 -0
- /data/test/emails/{embedded_email_russian.txt → embedded_email_russian_1.txt} +0 -0
- /data/test/trimmed/{embedded_email_polish.txt → embedded_email_polish_1.txt} +0 -0
- /data/test/trimmed/{embedded_email_russian.txt → embedded_email_russian_1.txt} +0 -0
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
|
-
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
2
|
+
SHA256:
|
3
|
+
metadata.gz: 9914eb793e4bbfa4f291097707acb2af27f491507b0e52b0e1433df3c1fb1f0d
|
4
|
+
data.tar.gz: 2c519cd4b6fe2db1dc3b12201c8a5bd540fbf274537610069b73d34bb3e0566f
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: b0687f10183b5472c816d433c3f654dd085b8d615ef223c54e97e61e893c47752ed20eb5b1354c7d184ab84075de3c3fa492c76191b1fc0cfdcfc48a1d661b34
|
7
|
+
data.tar.gz: 3d740b872f6116bd0c59d2d1b19e405191f1c25c619e5564a602eebed3691323761292d7a063f7f224bacd668f2ede622eac085f7a5b7bad3b90570ac3b018b9
|
data/Gemfile
ADDED
data/Gemfile.lock
ADDED
@@ -0,0 +1,98 @@
|
|
1
|
+
PATH
|
2
|
+
remote: .
|
3
|
+
specs:
|
4
|
+
email_reply_trimmer (0.2.0)
|
5
|
+
|
6
|
+
GEM
|
7
|
+
remote: https://rubygems.org/
|
8
|
+
specs:
|
9
|
+
activesupport (8.0.0)
|
10
|
+
base64
|
11
|
+
benchmark (>= 0.3)
|
12
|
+
bigdecimal
|
13
|
+
concurrent-ruby (~> 1.0, >= 1.3.1)
|
14
|
+
connection_pool (>= 2.2.5)
|
15
|
+
drb
|
16
|
+
i18n (>= 1.6, < 2)
|
17
|
+
logger (>= 1.4.2)
|
18
|
+
minitest (>= 5.1)
|
19
|
+
securerandom (>= 0.3)
|
20
|
+
tzinfo (~> 2.0, >= 2.0.5)
|
21
|
+
uri (>= 0.13.1)
|
22
|
+
ast (2.4.2)
|
23
|
+
base64 (0.2.0)
|
24
|
+
benchmark (0.4.0)
|
25
|
+
bigdecimal (3.1.8)
|
26
|
+
concurrent-ruby (1.3.4)
|
27
|
+
connection_pool (2.4.1)
|
28
|
+
drb (2.2.1)
|
29
|
+
i18n (1.14.6)
|
30
|
+
concurrent-ruby (~> 1.0)
|
31
|
+
json (2.8.2)
|
32
|
+
language_server-protocol (3.17.0.3)
|
33
|
+
logger (1.6.1)
|
34
|
+
minitest (5.25.2)
|
35
|
+
parallel (1.26.3)
|
36
|
+
parser (3.3.6.0)
|
37
|
+
ast (~> 2.4.1)
|
38
|
+
racc
|
39
|
+
racc (1.8.1)
|
40
|
+
rack (3.1.8)
|
41
|
+
rainbow (3.1.1)
|
42
|
+
rake (12.3.3)
|
43
|
+
regexp_parser (2.9.2)
|
44
|
+
rubocop (1.69.0)
|
45
|
+
json (~> 2.3)
|
46
|
+
language_server-protocol (>= 3.17.0)
|
47
|
+
parallel (~> 1.10)
|
48
|
+
parser (>= 3.3.0.2)
|
49
|
+
rainbow (>= 2.2.2, < 4.0)
|
50
|
+
regexp_parser (>= 2.4, < 3.0)
|
51
|
+
rubocop-ast (>= 1.36.1, < 2.0)
|
52
|
+
ruby-progressbar (~> 1.7)
|
53
|
+
unicode-display_width (>= 2.4.0, < 4.0)
|
54
|
+
rubocop-ast (1.36.2)
|
55
|
+
parser (>= 3.3.1.0)
|
56
|
+
rubocop-capybara (2.21.0)
|
57
|
+
rubocop (~> 1.41)
|
58
|
+
rubocop-discourse (3.8.6)
|
59
|
+
activesupport (>= 6.1)
|
60
|
+
rubocop (>= 1.59.0)
|
61
|
+
rubocop-capybara (>= 2.0.0)
|
62
|
+
rubocop-factory_bot (>= 2.0.0)
|
63
|
+
rubocop-rails (>= 2.25.0)
|
64
|
+
rubocop-rspec (>= 3.0.1)
|
65
|
+
rubocop-rspec_rails (>= 2.30.0)
|
66
|
+
rubocop-factory_bot (2.26.1)
|
67
|
+
rubocop (~> 1.61)
|
68
|
+
rubocop-rails (2.27.0)
|
69
|
+
activesupport (>= 4.2.0)
|
70
|
+
rack (>= 1.1)
|
71
|
+
rubocop (>= 1.52.0, < 2.0)
|
72
|
+
rubocop-ast (>= 1.31.1, < 2.0)
|
73
|
+
rubocop-rspec (3.2.0)
|
74
|
+
rubocop (~> 1.61)
|
75
|
+
rubocop-rspec_rails (2.30.0)
|
76
|
+
rubocop (~> 1.61)
|
77
|
+
rubocop-rspec (~> 3, >= 3.0.1)
|
78
|
+
ruby-progressbar (1.13.0)
|
79
|
+
securerandom (0.3.2)
|
80
|
+
tzinfo (2.0.6)
|
81
|
+
concurrent-ruby (~> 1.0)
|
82
|
+
unicode-display_width (3.1.2)
|
83
|
+
unicode-emoji (~> 4.0, >= 4.0.4)
|
84
|
+
unicode-emoji (4.0.4)
|
85
|
+
uri (1.0.2)
|
86
|
+
|
87
|
+
PLATFORMS
|
88
|
+
ruby
|
89
|
+
|
90
|
+
DEPENDENCIES
|
91
|
+
email_reply_trimmer!
|
92
|
+
minitest (~> 5)
|
93
|
+
rake (~> 12)
|
94
|
+
rubocop
|
95
|
+
rubocop-discourse
|
96
|
+
|
97
|
+
BUNDLED WITH
|
98
|
+
2.5.22
|
data/README.md
CHANGED
@@ -1,4 +1,4 @@
|
|
1
|
-
# Discourse Email Reply Trimmer
|
1
|
+
# Discourse Email Reply Trimmer
|
2
2
|
|
3
3
|
EmailReplyTrimmer is a small library to trim replies from plain text email.
|
4
4
|
|
@@ -16,6 +16,6 @@ Run `rake` to run the tests.
|
|
16
16
|
|
17
17
|
## Inspirations
|
18
18
|
|
19
|
-
|
20
|
-
|
21
|
-
|
19
|
+
- [GitHub's Email Reply Parser](https://github.com/github/email_reply_parser)
|
20
|
+
- [MailGun's Talon](https://github.com/mailgun/talon)
|
21
|
+
- [Vitor R. Carvalho's Learning to Extract Signature and Reply Lines from Email](http://www.cs.cmu.edu/~vitor/papers/sigFilePaper_finalversion.pdf)
|
data/Rakefile
CHANGED
@@ -1,12 +1,9 @@
|
|
1
|
-
|
2
|
-
@name ||= Dir["*.gemspec"].first.split(".").first
|
3
|
-
end
|
1
|
+
# frozen_string_literal: true
|
4
2
|
|
5
|
-
|
6
|
-
|
7
|
-
end
|
3
|
+
name = Dir["*.gemspec"].first.split(".").first
|
4
|
+
version = File.read("lib/#{name}.rb")[/^\s*VERSION\s*=\s*['"](?'version'\d+\.\d+\.\d+)['"]/, "version"]
|
8
5
|
|
9
|
-
task :
|
6
|
+
task default: :test
|
10
7
|
|
11
8
|
require "rake/testtask"
|
12
9
|
Rake::TestTask.new(:test)
|
data/devenv.lock
ADDED
@@ -0,0 +1,184 @@
|
|
1
|
+
{
|
2
|
+
"nodes": {
|
3
|
+
"devenv": {
|
4
|
+
"locked": {
|
5
|
+
"dir": "src/modules",
|
6
|
+
"lastModified": 1732830318,
|
7
|
+
"owner": "cachix",
|
8
|
+
"repo": "devenv",
|
9
|
+
"rev": "51abcb75d471a215c800937d4e30dc765d305c6d",
|
10
|
+
"type": "github"
|
11
|
+
},
|
12
|
+
"original": {
|
13
|
+
"dir": "src/modules",
|
14
|
+
"owner": "cachix",
|
15
|
+
"repo": "devenv",
|
16
|
+
"type": "github"
|
17
|
+
}
|
18
|
+
},
|
19
|
+
"flake-compat": {
|
20
|
+
"flake": false,
|
21
|
+
"locked": {
|
22
|
+
"lastModified": 1732722421,
|
23
|
+
"owner": "edolstra",
|
24
|
+
"repo": "flake-compat",
|
25
|
+
"rev": "9ed2ac151eada2306ca8c418ebd97807bb08f6ac",
|
26
|
+
"type": "github"
|
27
|
+
},
|
28
|
+
"original": {
|
29
|
+
"owner": "edolstra",
|
30
|
+
"repo": "flake-compat",
|
31
|
+
"type": "github"
|
32
|
+
}
|
33
|
+
},
|
34
|
+
"flake-compat_2": {
|
35
|
+
"flake": false,
|
36
|
+
"locked": {
|
37
|
+
"lastModified": 1732722421,
|
38
|
+
"owner": "edolstra",
|
39
|
+
"repo": "flake-compat",
|
40
|
+
"rev": "9ed2ac151eada2306ca8c418ebd97807bb08f6ac",
|
41
|
+
"type": "github"
|
42
|
+
},
|
43
|
+
"original": {
|
44
|
+
"owner": "edolstra",
|
45
|
+
"repo": "flake-compat",
|
46
|
+
"type": "github"
|
47
|
+
}
|
48
|
+
},
|
49
|
+
"flake-utils": {
|
50
|
+
"inputs": {
|
51
|
+
"systems": "systems"
|
52
|
+
},
|
53
|
+
"locked": {
|
54
|
+
"lastModified": 1731533236,
|
55
|
+
"owner": "numtide",
|
56
|
+
"repo": "flake-utils",
|
57
|
+
"rev": "11707dc2f618dd54ca8739b309ec4fc024de578b",
|
58
|
+
"type": "github"
|
59
|
+
},
|
60
|
+
"original": {
|
61
|
+
"owner": "numtide",
|
62
|
+
"repo": "flake-utils",
|
63
|
+
"type": "github"
|
64
|
+
}
|
65
|
+
},
|
66
|
+
"gitignore": {
|
67
|
+
"inputs": {
|
68
|
+
"nixpkgs": [
|
69
|
+
"pre-commit-hooks",
|
70
|
+
"nixpkgs"
|
71
|
+
]
|
72
|
+
},
|
73
|
+
"locked": {
|
74
|
+
"lastModified": 1709087332,
|
75
|
+
"owner": "hercules-ci",
|
76
|
+
"repo": "gitignore.nix",
|
77
|
+
"rev": "637db329424fd7e46cf4185293b9cc8c88c95394",
|
78
|
+
"type": "github"
|
79
|
+
},
|
80
|
+
"original": {
|
81
|
+
"owner": "hercules-ci",
|
82
|
+
"repo": "gitignore.nix",
|
83
|
+
"type": "github"
|
84
|
+
}
|
85
|
+
},
|
86
|
+
"nixpkgs": {
|
87
|
+
"locked": {
|
88
|
+
"lastModified": 1732617236,
|
89
|
+
"owner": "nixos",
|
90
|
+
"repo": "nixpkgs",
|
91
|
+
"rev": "af51545ec9a44eadf3fe3547610a5cdd882bc34e",
|
92
|
+
"type": "github"
|
93
|
+
},
|
94
|
+
"original": {
|
95
|
+
"owner": "nixos",
|
96
|
+
"ref": "nixpkgs-unstable",
|
97
|
+
"repo": "nixpkgs",
|
98
|
+
"type": "github"
|
99
|
+
}
|
100
|
+
},
|
101
|
+
"nixpkgs-ruby": {
|
102
|
+
"inputs": {
|
103
|
+
"flake-compat": "flake-compat",
|
104
|
+
"flake-utils": "flake-utils",
|
105
|
+
"nixpkgs": [
|
106
|
+
"nixpkgs"
|
107
|
+
]
|
108
|
+
},
|
109
|
+
"locked": {
|
110
|
+
"lastModified": 1730958464,
|
111
|
+
"owner": "bobvanderlinden",
|
112
|
+
"repo": "nixpkgs-ruby",
|
113
|
+
"rev": "93bd040be2856ba0e44a33db6360e8c9c0c09aa1",
|
114
|
+
"type": "github"
|
115
|
+
},
|
116
|
+
"original": {
|
117
|
+
"owner": "bobvanderlinden",
|
118
|
+
"repo": "nixpkgs-ruby",
|
119
|
+
"type": "github"
|
120
|
+
}
|
121
|
+
},
|
122
|
+
"nixpkgs-stable": {
|
123
|
+
"locked": {
|
124
|
+
"lastModified": 1732749044,
|
125
|
+
"owner": "NixOS",
|
126
|
+
"repo": "nixpkgs",
|
127
|
+
"rev": "0c5b4ecbed5b155b705336aa96d878e55acd8685",
|
128
|
+
"type": "github"
|
129
|
+
},
|
130
|
+
"original": {
|
131
|
+
"owner": "NixOS",
|
132
|
+
"ref": "nixos-24.05",
|
133
|
+
"repo": "nixpkgs",
|
134
|
+
"type": "github"
|
135
|
+
}
|
136
|
+
},
|
137
|
+
"pre-commit-hooks": {
|
138
|
+
"inputs": {
|
139
|
+
"flake-compat": "flake-compat_2",
|
140
|
+
"gitignore": "gitignore",
|
141
|
+
"nixpkgs": [
|
142
|
+
"nixpkgs"
|
143
|
+
],
|
144
|
+
"nixpkgs-stable": "nixpkgs-stable"
|
145
|
+
},
|
146
|
+
"locked": {
|
147
|
+
"lastModified": 1732021966,
|
148
|
+
"owner": "cachix",
|
149
|
+
"repo": "pre-commit-hooks.nix",
|
150
|
+
"rev": "3308484d1a443fc5bc92012435d79e80458fe43c",
|
151
|
+
"type": "github"
|
152
|
+
},
|
153
|
+
"original": {
|
154
|
+
"owner": "cachix",
|
155
|
+
"repo": "pre-commit-hooks.nix",
|
156
|
+
"type": "github"
|
157
|
+
}
|
158
|
+
},
|
159
|
+
"root": {
|
160
|
+
"inputs": {
|
161
|
+
"devenv": "devenv",
|
162
|
+
"nixpkgs": "nixpkgs",
|
163
|
+
"nixpkgs-ruby": "nixpkgs-ruby",
|
164
|
+
"pre-commit-hooks": "pre-commit-hooks"
|
165
|
+
}
|
166
|
+
},
|
167
|
+
"systems": {
|
168
|
+
"locked": {
|
169
|
+
"lastModified": 1681028828,
|
170
|
+
"owner": "nix-systems",
|
171
|
+
"repo": "default",
|
172
|
+
"rev": "da67096a3b9bf56a91d16901293e51ba5b49a27e",
|
173
|
+
"type": "github"
|
174
|
+
},
|
175
|
+
"original": {
|
176
|
+
"owner": "nix-systems",
|
177
|
+
"repo": "default",
|
178
|
+
"type": "github"
|
179
|
+
}
|
180
|
+
}
|
181
|
+
},
|
182
|
+
"root": "root",
|
183
|
+
"version": 7
|
184
|
+
}
|
data/devenv.nix
ADDED
data/devenv.yaml
ADDED
data/email_reply_trimmer.gemspec
CHANGED
@@ -1,3 +1,4 @@
|
|
1
|
+
# frozen_string_literal: true
|
1
2
|
require_relative "lib/email_reply_trimmer"
|
2
3
|
|
3
4
|
Gem::Specification.new do |s|
|
@@ -9,12 +10,17 @@ Gem::Specification.new do |s|
|
|
9
10
|
s.description = "EmailReplyTrimmer is a small library to trim replies from plain text email."
|
10
11
|
|
11
12
|
s.authors = ["Régis Hanol"]
|
12
|
-
s.email = ["
|
13
|
+
s.email = ["rubygems@hanol.fr"]
|
13
14
|
|
14
15
|
s.homepage = "https://github.com/discourse/email_reply_trimmer"
|
15
16
|
s.license = "MIT"
|
16
17
|
|
17
18
|
s.require_paths = ["lib"]
|
18
|
-
s.files = Dir["**/*"].reject { |path| File.directory?(path) }
|
19
|
+
s.files = Dir["**/*"].reject { |path| File.directory?(path) || path =~ /.*\.gem$/ }
|
19
20
|
s.test_files = s.files.select { |path| path =~ /^test\/.+_test\.rb$/ }
|
21
|
+
|
22
|
+
s.add_development_dependency 'rake', '~> 12'
|
23
|
+
s.add_development_dependency 'minitest', '~> 5'
|
24
|
+
s.add_development_dependency 'rubocop'
|
25
|
+
s.add_development_dependency 'rubocop-discourse'
|
20
26
|
end
|
@@ -1,7 +1,8 @@
|
|
1
|
+
# frozen_string_literal: true
|
1
2
|
class DelimiterMatcher
|
2
3
|
|
3
|
-
DELIMITER_CHARACTERS
|
4
|
-
DELIMITER_REGEX
|
4
|
+
DELIMITER_CHARACTERS = "-_,=+~#*ᐧ—"
|
5
|
+
DELIMITER_REGEX = /^[[:blank:]]*[#{Regexp.escape(DELIMITER_CHARACTERS)}]+[[:blank:]]*$/
|
5
6
|
|
6
7
|
def self.match?(line)
|
7
8
|
line =~ DELIMITER_REGEX
|
@@ -1,12 +1,13 @@
|
|
1
|
+
# frozen_string_literal: true
|
1
2
|
class EmailHeaderMatcher
|
2
3
|
|
3
4
|
EMAIL_HEADERS_WITH_DATE_MARKERS = [
|
4
|
-
#
|
5
|
+
# Norwegian
|
5
6
|
["Sendt"],
|
6
7
|
# English
|
7
|
-
["Sent"],
|
8
|
+
["Sent", "Date"],
|
8
9
|
# French
|
9
|
-
["Date"],
|
10
|
+
["Date", "Le"],
|
10
11
|
# German
|
11
12
|
["Gesendet"],
|
12
13
|
# Portuguese
|
@@ -17,19 +18,25 @@ class EmailHeaderMatcher
|
|
17
18
|
["Fecha"],
|
18
19
|
# Italian
|
19
20
|
["Data"],
|
21
|
+
# Dutch
|
22
|
+
["Datum"],
|
23
|
+
# Swedish
|
24
|
+
["Skickat"],
|
25
|
+
# Chinese
|
26
|
+
["发送时间"],
|
20
27
|
]
|
21
28
|
|
22
29
|
EMAIL_HEADERS_WITH_DATE_REGEXES = EMAIL_HEADERS_WITH_DATE_MARKERS.map do |header|
|
23
|
-
/^[[:blank:]
|
30
|
+
/^[[:blank:]*]*(?:#{header.join("|")})[[:blank:]*]*:.*\d+/
|
24
31
|
end
|
25
32
|
|
26
33
|
EMAIL_HEADERS_WITH_TEXT_MARKERS = [
|
27
|
-
#
|
34
|
+
# Norwegian
|
28
35
|
["Fra", "Til", "Emne"],
|
29
36
|
# English
|
30
37
|
["From", "To", "Cc", "Reply-To", "Subject"],
|
31
38
|
# French
|
32
|
-
["De", "À", "Répondre à", "Objet"],
|
39
|
+
["De", "Expéditeur", "À", "Destinataire", "Répondre à", "Objet"],
|
33
40
|
# German
|
34
41
|
["Von", "An", "Betreff"],
|
35
42
|
# Portuguese
|
@@ -37,11 +44,17 @@ class EmailHeaderMatcher
|
|
37
44
|
# Spanish
|
38
45
|
["De", "Para", "Asunto"],
|
39
46
|
# Italian
|
40
|
-
["Da", "Risposta", "A", "Oggetto"]
|
47
|
+
["Da", "Risposta", "A", "Oggetto"],
|
48
|
+
# Dutch
|
49
|
+
["Van", "Beantwoorden - Aan", "Aan", "Onderwerp"],
|
50
|
+
# Swedish
|
51
|
+
["Från", "Till", "Ämne"],
|
52
|
+
# Chinese
|
53
|
+
["发件人", "收件人", "主题"],
|
41
54
|
]
|
42
55
|
|
43
56
|
EMAIL_HEADERS_WITH_TEXT_REGEXES = EMAIL_HEADERS_WITH_TEXT_MARKERS.map do |header|
|
44
|
-
/^[[:blank:]
|
57
|
+
/^[[:blank:]*]*(?:#{header.join("|")})[[:blank:]*]*:.*[[:word:]]+/i
|
45
58
|
end
|
46
59
|
|
47
60
|
EMAIL_HEADER_REGEXES = [
|
@@ -1,3 +1,4 @@
|
|
1
|
+
# frozen_string_literal: true
|
1
2
|
class EmbeddedEmailMatcher
|
2
3
|
|
3
4
|
# On Wed, Sep 25, 2013, at 03:57 PM, jorge_castro wrote:
|
@@ -10,38 +11,43 @@ class EmbeddedEmailMatcher
|
|
10
11
|
# Dnia 14 lip 2015 o godz. 00:25 Michael Downey <info@discourse.org> napisał(a):
|
11
12
|
# Em seg, 27 de jul de 2015 17:13, Neil Lalonde <info@discourse.org> escreveu:
|
12
13
|
# El jueves, 21 de noviembre de 2013, codinghorror escribió:
|
13
|
-
#
|
14
|
-
|
14
|
+
# At 6/16/2016 08:32 PM, you wrote:
|
15
|
+
ON_DATE_SOMEONE_WROTE_REGEXES = [
|
16
|
+
# Chinese
|
17
|
+
/^[[:blank:]<>-]*在 (?:(?!\b(?>在|写道)\b).)+?写道[[:blank:].:>-]*$/im,
|
15
18
|
# Dutch
|
16
|
-
[
|
19
|
+
/^[[:blank:]<>-]*Op (?:(?!\b(?>Op|het\svolgende\sgeschreven|schreef)\b).)+?(het\svolgende\sgeschreven|schreef[^:]+)[[:blank:].:>-]*$/im,
|
17
20
|
# English
|
18
|
-
[
|
21
|
+
/^[[:blank:]<>-]*In message (?:(?!\b(?>In message|writes)\b).)+?writes[[:blank:].:>-]*$/im,
|
22
|
+
/^[[:blank:]<>-]*(On|At) (?:(?!\b(?>On|wrote|writes|says|said)\b).)+?(wrote|writes|says|said)[[:blank:].:>-]*$/im,
|
19
23
|
# French
|
20
|
-
[
|
24
|
+
/^[[:blank:]<>-]*Le (?:(?!\b(?>Le|nous\sa\sdit|a\s+écrit)\b).)+?(nous\sa\sdit|a\s+écrit)[[:blank:].:>-]*$/im,
|
25
|
+
# German
|
26
|
+
/^[[:blank:]<>-]*Am (?:(?!\b(?>Am|schrieben\sSie)\b).)+?schrieben\sSie[[:blank:].:>-]*$/im,
|
27
|
+
/^[[:blank:]<>-]*Am (?:(?!\b(?>Am|geschrieben)\b).)+?(geschrieben|schrieb[^:]+)[[:blank:].:>-]*$/im,
|
21
28
|
# Italian
|
22
|
-
[
|
29
|
+
/^[[:blank:]<>-]*Il (?:(?!\b(?>Il|ha\sscritto)\b).)+?ha\sscritto[[:blank:].:>-]*$/im,
|
23
30
|
# Polish
|
24
|
-
[
|
31
|
+
/^[[:blank:]<>-]*(Dnia|Dňa) (?:(?!\b(?>Dnia|Dňa|napisał)\b).)+?napisał(\(a\))?[[:blank:].:>-]*$/im,
|
25
32
|
# Portuguese
|
26
|
-
[
|
33
|
+
/^[[:blank:]<>-]*Em (?:(?!\b(?>Em|escreveu)\b).)+?escreveu[[:blank:].:>-]*$/im,
|
27
34
|
# Spanish
|
28
|
-
[
|
29
|
-
# German
|
30
|
-
["Am", "schrieb"],
|
35
|
+
/^[[:blank:]<>-]*El (?:(?!\b(?>El|escribió)\b).)+?escribió[[:blank:].:>-]*$/im,
|
31
36
|
]
|
32
37
|
|
33
|
-
ON_DATE_SOMEONE_WROTE_REGEXES = ON_DATE_SOMEONE_WROTE_MARKERS.map do |on, wrote|
|
34
|
-
wrote.gsub!(/ +/, "[[:space:]]+") # the "wrote" part might span over multiple lines
|
35
|
-
/^([[:blank:]>\-]*#{on}\s(?:(?!#{on}\s|#{wrote}:?)[\s\S])*#{wrote}:?[[:blank:]\-]*)$/m
|
36
|
-
end
|
37
|
-
|
38
38
|
# Op 10 dec. 2015 18:35 schreef "Arpit Jalan" <info@discourse.org>:
|
39
39
|
# Am 18.09.2013 um 16:24 schrieb codinghorror <info@discourse.org>:
|
40
|
+
# Den 15. jun. 2016 kl. 20.42 skrev Jeff Atwood <info@discourse.org>:
|
41
|
+
# søn. 30. apr. 2017 kl. 00.26 skrev David Taylor <meta@discoursemail.com>:
|
40
42
|
ON_DATE_WROTE_SOMEONE_MARKERS = [
|
41
43
|
# Dutch
|
42
44
|
["Op", "schreef"],
|
43
45
|
# German
|
44
46
|
["Am", "schrieb"],
|
47
|
+
# Norwegian
|
48
|
+
["Den", "skrev"],
|
49
|
+
# Dutch
|
50
|
+
["søn\.", "skrev"],
|
45
51
|
]
|
46
52
|
|
47
53
|
ON_DATE_WROTE_SOMEONE_REGEXES = ON_DATE_WROTE_SOMEONE_MARKERS.map do |on, wrote|
|
@@ -49,15 +55,31 @@ class EmbeddedEmailMatcher
|
|
49
55
|
end
|
50
56
|
|
51
57
|
# суббота, 14 марта 2015 г. пользователь etewiah написал:
|
58
|
+
# 23 mar 2017 21:25 "Neil Lalonde" <meta@discoursemail.com> napisał(a):
|
59
|
+
# 30 серп. 2016 р. 20:45 "Arpit" no-reply@example.com пише:
|
52
60
|
DATE_SOMEONE_WROTE_MARKERS = [
|
53
61
|
# Russian
|
54
62
|
["пользователь", "написал"],
|
63
|
+
# Polish
|
64
|
+
["", "napisał\\(a\\)"],
|
65
|
+
# Ukrainian
|
66
|
+
["", "пише"],
|
55
67
|
]
|
56
68
|
|
57
69
|
DATE_SOMEONE_WROTE_REGEXES = DATE_SOMEONE_WROTE_MARKERS.map do |user, wrote|
|
58
|
-
|
70
|
+
user.size == 0 ?
|
71
|
+
/\d{4}.{1,80}\n?.{0,80}?#{wrote}:/ :
|
72
|
+
/\d{4}.{1,80}#{user}.{0,80}\n?.{0,80}?#{wrote}:/
|
59
73
|
end
|
60
74
|
|
75
|
+
# Max Mustermann <try_discourse@discoursemail.com> schrieb am Fr., 28. Apr. 2017 um 11:53 Uhr:
|
76
|
+
SOMEONE_WROTE_ON_DATE_REGEXES = [
|
77
|
+
# English
|
78
|
+
/^.+\bwrote\b[[:space:]]+\bon\b.+[^:]+:/,
|
79
|
+
# German
|
80
|
+
/^.+\bschrieb\b[[:space:]]+\bam\b.+[^:]+:/,
|
81
|
+
]
|
82
|
+
|
61
83
|
# 2016-03-03 17:21 GMT+01:00 Some One
|
62
84
|
ISO_DATE_SOMEONE_REGEX = /^[[:blank:]>]*20\d\d-\d\d-\d\d \d\d:\d\d GMT\+\d\d:\d\d [\w[:blank:]]+$/
|
63
85
|
|
@@ -65,7 +87,7 @@ class EmbeddedEmailMatcher
|
|
65
87
|
# 2013/10/2 camilohollanda <info@discourse.org>
|
66
88
|
# вт, 5 янв. 2016 г. в 23:39, Erlend Sogge Heggen <info@discourse.org>:
|
67
89
|
# ср, 1 апр. 2015, 18:29, Denis Didkovsky <info@discourse.org>:
|
68
|
-
DATE_SOMEONE_EMAIL_REGEX =
|
90
|
+
DATE_SOMEONE_EMAIL_REGEX = /\d{4}.{1,80}\s?<[^@<>]+@[^@<>.]+\.[^@<>]+>:?$/
|
69
91
|
|
70
92
|
# codinghorror via Discourse Meta wrote:
|
71
93
|
# codinghorror via Discourse Meta <info@discourse.org> schrieb:
|
@@ -77,11 +99,12 @@ class EmbeddedEmailMatcher
|
|
77
99
|
]
|
78
100
|
|
79
101
|
SOMEONE_VIA_SOMETHING_WROTE_REGEXES = SOMEONE_VIA_SOMETHING_WROTE_MARKERS.map do |wrote|
|
80
|
-
|
102
|
+
/^.+ via .+ #{wrote}:?[[:blank:]]*$/
|
81
103
|
end
|
82
104
|
|
83
105
|
# Some One <info@discourse.org> wrote:
|
84
|
-
|
106
|
+
# Gavin Sinclair (gsinclair@soyabean.com.au) wrote:
|
107
|
+
SOMEONE_EMAIL_WROTE_REGEX = /^.+\b[\w.+-]+@[\w.-]+\.\w{2,}\b.+wrote:?$/
|
85
108
|
|
86
109
|
# Posted by mpalmer on 01/21/2016
|
87
110
|
POSTED_BY_SOMEONE_ON_DATE_REGEX = /^[[:blank:]>]*Posted by .+ on \d{2}\/\d{2}\/\d{4}$/i
|
@@ -92,17 +115,21 @@ class EmbeddedEmailMatcher
|
|
92
115
|
# ----- Original Message -----
|
93
116
|
# -----Original Message-----
|
94
117
|
# *----- Original Message -----*
|
118
|
+
# ----- Reply message -----
|
119
|
+
# ------------------ 原始邮件 ------------------
|
95
120
|
FORWARDED_EMAIL_REGEXES = [
|
96
121
|
# English
|
97
122
|
/^[[:blank:]>]*Begin forwarded message:/i,
|
98
|
-
/^[[:blank:]
|
99
|
-
/^[[:blank:]>\*]*-{2,}[[:blank:]]*(Forwarded|Original) Message[[:blank:]]*-{2,}/i,
|
123
|
+
/^[[:blank:]>*]*-{2,}[[:blank:]]*(Forwarded|Original|Reply) Message[[:blank:]]*-{2,}/i,
|
100
124
|
# French
|
101
|
-
/^[[:blank:]
|
125
|
+
/^[[:blank:]>]*Début du message transféré :/i,
|
126
|
+
/^[[:blank:]>*]*-{2,}[[:blank:]]*Message transféré[[:blank:]]*-{2,}/i,
|
102
127
|
# German
|
103
|
-
/^[[:blank:]
|
128
|
+
/^[[:blank:]>*]*-{2,}[[:blank:]]*Ursprüngliche Nachricht[[:blank:]]*-{2,}/i,
|
104
129
|
# Spanish
|
105
|
-
/^[[:blank:]
|
130
|
+
/^[[:blank:]>*]*-{2,}[[:blank:]]*Mensaje original[[:blank:]]*-{2,}/i,
|
131
|
+
# Chinese
|
132
|
+
/^[[:blank:]>*]*-{2,}[[:blank:]]*原始邮件[[:blank:]]*-{2,}/i,
|
106
133
|
]
|
107
134
|
|
108
135
|
EMBEDDED_REGEXES = [
|
@@ -110,6 +137,7 @@ class EmbeddedEmailMatcher
|
|
110
137
|
ON_DATE_WROTE_SOMEONE_REGEXES,
|
111
138
|
DATE_SOMEONE_WROTE_REGEXES,
|
112
139
|
DATE_SOMEONE_EMAIL_REGEX,
|
140
|
+
SOMEONE_WROTE_ON_DATE_REGEXES,
|
113
141
|
ISO_DATE_SOMEONE_REGEX,
|
114
142
|
SOMEONE_VIA_SOMETHING_WROTE_REGEXES,
|
115
143
|
SOMEONE_EMAIL_WROTE_REGEX,
|
@@ -1,3 +1,4 @@
|
|
1
|
+
# frozen_string_literal: true
|
1
2
|
class SignatureMatcher
|
2
3
|
|
3
4
|
# Envoyé depuis mon iPhone
|
@@ -15,22 +16,36 @@ class SignatureMatcher
|
|
15
16
|
# 從我的 iPhone 傳送
|
16
17
|
SIGNATURE_REGEXES = [
|
17
18
|
# Chinese
|
18
|
-
/^[[:blank:]
|
19
|
+
/^[[:blank:]]*從我的 iPhone 傳送/i,
|
19
20
|
# English
|
20
|
-
/^[[:blank:]
|
21
|
-
/^[[:blank:]
|
22
|
-
/^[[:blank:]
|
21
|
+
/^[[:blank:]]*[[:word:]]+ from mobile/i,
|
22
|
+
/^[[:blank:]]*[\(<]*Sent (from|via|with|by) .+[\)>]*/i,
|
23
|
+
/^[[:blank:]]*From my .{1,20}/i,
|
24
|
+
/^[[:blank:]]*Get Outlook for /i,
|
23
25
|
# French
|
24
|
-
/^[[:blank:]
|
26
|
+
/^[[:blank:]]*Envoyé depuis (mon|Yahoo Mail)/i,
|
25
27
|
# German
|
26
|
-
/^[[:blank:]
|
27
|
-
/^[[:blank:]
|
28
|
+
/^[[:blank:]]*Von meinem .+ gesendet/i,
|
29
|
+
/^[[:blank:]]*Diese Nachricht wurde von .+ gesendet/i,
|
30
|
+
# Italian
|
31
|
+
/^[[:blank:]]*Inviato da /i,
|
32
|
+
# Norwegian
|
33
|
+
/^[[:blank:]]*Sendt fra min /i,
|
34
|
+
# Portuguese
|
35
|
+
/^[[:blank:]]*Enviado do meu /i,
|
28
36
|
# Spanish
|
29
|
-
/^[[:blank:]
|
37
|
+
/^[[:blank:]]*Enviado desde mi /i,
|
38
|
+
# Dutch
|
39
|
+
/^[[:blank:]]*Verzonden met /i,
|
40
|
+
/^[[:blank:]]*Verstuurd vanaf mijn /i,
|
41
|
+
# Swedish
|
42
|
+
/^[[:blank:]]*från min /i,
|
30
43
|
]
|
31
44
|
|
32
45
|
def self.match?(line)
|
33
|
-
|
46
|
+
# remove any markdown links
|
47
|
+
stripped = line.gsub(/\[([^\]]+)\]\([^\)]+\)/) { $1 }
|
48
|
+
SIGNATURE_REGEXES.any? { |r| stripped =~ r }
|
34
49
|
end
|
35
50
|
|
36
51
|
end
|