email_reply_trimmer 0.1.6 → 0.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (105) hide show
  1. checksums.yaml +5 -5
  2. data/Gemfile +5 -0
  3. data/Gemfile.lock +98 -0
  4. data/README.md +4 -4
  5. data/Rakefile +4 -7
  6. data/devenv.lock +184 -0
  7. data/devenv.nix +4 -0
  8. data/devenv.yaml +8 -0
  9. data/email_reply_trimmer.gemspec +8 -2
  10. data/lib/email_reply_trimmer/delimiter_matcher.rb +3 -2
  11. data/lib/email_reply_trimmer/email_header_matcher.rb +21 -8
  12. data/lib/email_reply_trimmer/embedded_email_matcher.rb +53 -25
  13. data/lib/email_reply_trimmer/empty_line_matcher.rb +1 -0
  14. data/lib/email_reply_trimmer/quote_matcher.rb +1 -0
  15. data/lib/email_reply_trimmer/signature_matcher.rb +24 -9
  16. data/lib/email_reply_trimmer.rb +102 -37
  17. data/test/before/forwarded_apple.txt +1 -0
  18. data/test/before/forwarded_gmail.txt +1 -0
  19. data/test/elided/block_code_spacers.txt +0 -0
  20. data/test/elided/email_headers_5.txt +23 -0
  21. data/test/elided/embedded_ception.txt +3 -3
  22. data/test/elided/embedded_email_12.txt +2 -2
  23. data/test/elided/embedded_email_13.txt +9 -0
  24. data/test/elided/embedded_email_14.txt +11 -0
  25. data/test/elided/embedded_email_15.txt +4 -0
  26. data/test/elided/embedded_email_16.txt +4 -0
  27. data/test/elided/embedded_email_17.txt +2 -0
  28. data/test/elided/embedded_email_18.txt +1 -0
  29. data/test/elided/embedded_email_19.txt +0 -0
  30. data/test/elided/embedded_email_chinese.txt +4 -0
  31. data/test/elided/embedded_email_german_4.txt +15 -0
  32. data/test/elided/embedded_email_german_5.txt +20 -0
  33. data/test/elided/embedded_email_german_6.txt +8 -0
  34. data/test/elided/embedded_email_norwegian.txt +9 -0
  35. data/test/elided/embedded_email_polish_2.txt +7 -0
  36. data/test/elided/embedded_email_quote_text.txt +5 -0
  37. data/test/elided/embedded_email_russian_2.txt +23 -0
  38. data/test/elided/embedded_email_swedish.txt +8 -0
  39. data/test/elided/embedded_email_ukrainian.txt +17 -0
  40. data/test/elided/forwarded_apple.txt +15 -0
  41. data/test/elided/forwarded_gmail.txt +15 -0
  42. data/test/elided/signatures.txt +5 -0
  43. data/test/elided/spam_1.txt +75 -0
  44. data/test/elided/spam_2.txt +152 -0
  45. data/test/emails/block_code_spacers.txt +13 -0
  46. data/test/emails/email_headers_5.txt +37 -0
  47. data/test/emails/embedded_email_1.txt +1 -1
  48. data/test/emails/embedded_email_13.txt +14 -0
  49. data/test/emails/embedded_email_14.txt +16 -0
  50. data/test/emails/embedded_email_15.txt +9 -0
  51. data/test/emails/embedded_email_16.txt +16 -0
  52. data/test/emails/embedded_email_17.txt +38 -0
  53. data/test/emails/embedded_email_18.txt +7 -0
  54. data/test/emails/embedded_email_19.txt +13 -0
  55. data/test/emails/embedded_email_4.txt +13 -13
  56. data/test/emails/embedded_email_7.txt +4 -4
  57. data/test/emails/embedded_email_chinese.txt +7 -0
  58. data/test/emails/embedded_email_german_4.txt +18 -0
  59. data/test/emails/embedded_email_german_5.txt +23 -0
  60. data/test/emails/embedded_email_german_6.txt +14 -0
  61. data/test/emails/embedded_email_norwegian.txt +11 -0
  62. data/test/emails/embedded_email_polish_2.txt +11 -0
  63. data/test/emails/embedded_email_russian_2.txt +26 -0
  64. data/test/emails/embedded_email_swedish.txt +20 -0
  65. data/test/emails/embedded_email_ukrainian.txt +19 -0
  66. data/test/emails/forwarded_apple.txt +17 -0
  67. data/test/emails/forwarded_gmail.txt +17 -0
  68. data/test/emails/signatures.txt +5 -0
  69. data/test/emails/spam_1.txt +75 -0
  70. data/test/emails/spam_2.txt +174 -0
  71. data/test/embedded/forwarded_apple.txt +13 -0
  72. data/test/embedded/forwarded_gmail.txt +14 -0
  73. data/test/matchers/does_not_contain_embedded_email.txt +5 -0
  74. data/test/test_email_matcher.rb +16 -0
  75. data/test/test_email_reply_trimmer.rb +8 -3
  76. data/test/trimmed/block_code_spacers.txt +13 -0
  77. data/test/trimmed/email_headers_5.txt +11 -0
  78. data/test/trimmed/embedded_email_13.txt +3 -0
  79. data/test/trimmed/embedded_email_14.txt +3 -0
  80. data/test/trimmed/embedded_email_15.txt +3 -0
  81. data/test/trimmed/embedded_email_16.txt +11 -0
  82. data/test/trimmed/embedded_email_17.txt +35 -0
  83. data/test/trimmed/embedded_email_18.txt +5 -0
  84. data/test/trimmed/embedded_email_19.txt +13 -0
  85. data/test/trimmed/embedded_email_chinese.txt +2 -0
  86. data/test/trimmed/embedded_email_german_4.txt +1 -0
  87. data/test/trimmed/embedded_email_german_5.txt +1 -0
  88. data/test/trimmed/embedded_email_german_6.txt +4 -0
  89. data/test/trimmed/embedded_email_norwegian.txt +1 -0
  90. data/test/trimmed/embedded_email_polish_2.txt +2 -0
  91. data/test/trimmed/embedded_email_quote_text.txt +0 -5
  92. data/test/trimmed/embedded_email_russian_2.txt +1 -0
  93. data/test/trimmed/embedded_email_swedish.txt +9 -0
  94. data/test/trimmed/embedded_email_ukrainian.txt +1 -0
  95. data/test/trimmed/forwarded_apple.txt +1 -0
  96. data/test/trimmed/forwarded_gmail.txt +1 -0
  97. data/test/trimmed/spam_1.txt +0 -0
  98. data/test/trimmed/spam_2.txt +21 -0
  99. metadata +147 -15
  100. /data/test/elided/{embedded_email_polish.txt → embedded_email_polish_1.txt} +0 -0
  101. /data/test/elided/{embedded_email_russian.txt → embedded_email_russian_1.txt} +0 -0
  102. /data/test/emails/{embedded_email_polish.txt → embedded_email_polish_1.txt} +0 -0
  103. /data/test/emails/{embedded_email_russian.txt → embedded_email_russian_1.txt} +0 -0
  104. /data/test/trimmed/{embedded_email_polish.txt → embedded_email_polish_1.txt} +0 -0
  105. /data/test/trimmed/{embedded_email_russian.txt → embedded_email_russian_1.txt} +0 -0
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
- SHA1:
3
- metadata.gz: c6eabb8ce3f3327f3abe63f3ee2fe147fb161d96
4
- data.tar.gz: 7a07c2267ef47f4607a2ae2f05feac8c0a2584c1
2
+ SHA256:
3
+ metadata.gz: 9914eb793e4bbfa4f291097707acb2af27f491507b0e52b0e1433df3c1fb1f0d
4
+ data.tar.gz: 2c519cd4b6fe2db1dc3b12201c8a5bd540fbf274537610069b73d34bb3e0566f
5
5
  SHA512:
6
- metadata.gz: dba90e1fdc0b0a4f7032f9c2d1e9aabf575a390ee4cf39618037b9a820de8d7abad6338623b203211ef349dc3568b2e55440721dc89fa8ac144eda878cf0e463
7
- data.tar.gz: 19be9d9b0496d31e81f7f7adb296d273aeea84130a8243d669857de751748a4e923584a14d4c8f0a90f338428e7dd219fa4212fddce38575b44ec58a598c5455
6
+ metadata.gz: b0687f10183b5472c816d433c3f654dd085b8d615ef223c54e97e61e893c47752ed20eb5b1354c7d184ab84075de3c3fa492c76191b1fc0cfdcfc48a1d661b34
7
+ data.tar.gz: 3d740b872f6116bd0c59d2d1b19e405191f1c25c619e5564a602eebed3691323761292d7a063f7f224bacd668f2ede622eac085f7a5b7bad3b90570ac3b018b9
data/Gemfile ADDED
@@ -0,0 +1,5 @@
1
+ # frozen_string_literal: true
2
+ source 'https://rubygems.org'
3
+
4
+ # Specify your gem's dependencies in email_reply_trimmer.gemspec
5
+ gemspec
data/Gemfile.lock ADDED
@@ -0,0 +1,98 @@
1
+ PATH
2
+ remote: .
3
+ specs:
4
+ email_reply_trimmer (0.2.0)
5
+
6
+ GEM
7
+ remote: https://rubygems.org/
8
+ specs:
9
+ activesupport (8.0.0)
10
+ base64
11
+ benchmark (>= 0.3)
12
+ bigdecimal
13
+ concurrent-ruby (~> 1.0, >= 1.3.1)
14
+ connection_pool (>= 2.2.5)
15
+ drb
16
+ i18n (>= 1.6, < 2)
17
+ logger (>= 1.4.2)
18
+ minitest (>= 5.1)
19
+ securerandom (>= 0.3)
20
+ tzinfo (~> 2.0, >= 2.0.5)
21
+ uri (>= 0.13.1)
22
+ ast (2.4.2)
23
+ base64 (0.2.0)
24
+ benchmark (0.4.0)
25
+ bigdecimal (3.1.8)
26
+ concurrent-ruby (1.3.4)
27
+ connection_pool (2.4.1)
28
+ drb (2.2.1)
29
+ i18n (1.14.6)
30
+ concurrent-ruby (~> 1.0)
31
+ json (2.8.2)
32
+ language_server-protocol (3.17.0.3)
33
+ logger (1.6.1)
34
+ minitest (5.25.2)
35
+ parallel (1.26.3)
36
+ parser (3.3.6.0)
37
+ ast (~> 2.4.1)
38
+ racc
39
+ racc (1.8.1)
40
+ rack (3.1.8)
41
+ rainbow (3.1.1)
42
+ rake (12.3.3)
43
+ regexp_parser (2.9.2)
44
+ rubocop (1.69.0)
45
+ json (~> 2.3)
46
+ language_server-protocol (>= 3.17.0)
47
+ parallel (~> 1.10)
48
+ parser (>= 3.3.0.2)
49
+ rainbow (>= 2.2.2, < 4.0)
50
+ regexp_parser (>= 2.4, < 3.0)
51
+ rubocop-ast (>= 1.36.1, < 2.0)
52
+ ruby-progressbar (~> 1.7)
53
+ unicode-display_width (>= 2.4.0, < 4.0)
54
+ rubocop-ast (1.36.2)
55
+ parser (>= 3.3.1.0)
56
+ rubocop-capybara (2.21.0)
57
+ rubocop (~> 1.41)
58
+ rubocop-discourse (3.8.6)
59
+ activesupport (>= 6.1)
60
+ rubocop (>= 1.59.0)
61
+ rubocop-capybara (>= 2.0.0)
62
+ rubocop-factory_bot (>= 2.0.0)
63
+ rubocop-rails (>= 2.25.0)
64
+ rubocop-rspec (>= 3.0.1)
65
+ rubocop-rspec_rails (>= 2.30.0)
66
+ rubocop-factory_bot (2.26.1)
67
+ rubocop (~> 1.61)
68
+ rubocop-rails (2.27.0)
69
+ activesupport (>= 4.2.0)
70
+ rack (>= 1.1)
71
+ rubocop (>= 1.52.0, < 2.0)
72
+ rubocop-ast (>= 1.31.1, < 2.0)
73
+ rubocop-rspec (3.2.0)
74
+ rubocop (~> 1.61)
75
+ rubocop-rspec_rails (2.30.0)
76
+ rubocop (~> 1.61)
77
+ rubocop-rspec (~> 3, >= 3.0.1)
78
+ ruby-progressbar (1.13.0)
79
+ securerandom (0.3.2)
80
+ tzinfo (2.0.6)
81
+ concurrent-ruby (~> 1.0)
82
+ unicode-display_width (3.1.2)
83
+ unicode-emoji (~> 4.0, >= 4.0.4)
84
+ unicode-emoji (4.0.4)
85
+ uri (1.0.2)
86
+
87
+ PLATFORMS
88
+ ruby
89
+
90
+ DEPENDENCIES
91
+ email_reply_trimmer!
92
+ minitest (~> 5)
93
+ rake (~> 12)
94
+ rubocop
95
+ rubocop-discourse
96
+
97
+ BUNDLED WITH
98
+ 2.5.22
data/README.md CHANGED
@@ -1,4 +1,4 @@
1
- # Discourse Email Reply Trimmer [![Build Status](https://api.travis-ci.org/discourse/email_reply_trimmer.svg?branch=master)](https://travis-ci.org/discourse/email_reply_trimmer)
1
+ # Discourse Email Reply Trimmer
2
2
 
3
3
  EmailReplyTrimmer is a small library to trim replies from plain text email.
4
4
 
@@ -16,6 +16,6 @@ Run `rake` to run the tests.
16
16
 
17
17
  ## Inspirations
18
18
 
19
- - [GitHub's Email Reply Parser](https://github.com/github/email_reply_parser)
20
- - [MailGun's Talon](https://github.com/mailgun/talon)
21
- - [Vitor R. Carvalho's Learning to Extract Signature and Reply Lines from Email](http://www.cs.cmu.edu/~vitor/papers/sigFilePaper_finalversion.pdf)
19
+ - [GitHub's Email Reply Parser](https://github.com/github/email_reply_parser)
20
+ - [MailGun's Talon](https://github.com/mailgun/talon)
21
+ - [Vitor R. Carvalho's Learning to Extract Signature and Reply Lines from Email](http://www.cs.cmu.edu/~vitor/papers/sigFilePaper_finalversion.pdf)
data/Rakefile CHANGED
@@ -1,12 +1,9 @@
1
- def name
2
- @name ||= Dir["*.gemspec"].first.split(".").first
3
- end
1
+ # frozen_string_literal: true
4
2
 
5
- def version
6
- @version ||= File.read("lib/#{name}.rb")[/^\s*VERSION\s*=\s*['"](?'version'\d+\.\d+\.\d+)['"]/, "version"]
7
- end
3
+ name = Dir["*.gemspec"].first.split(".").first
4
+ version = File.read("lib/#{name}.rb")[/^\s*VERSION\s*=\s*['"](?'version'\d+\.\d+\.\d+)['"]/, "version"]
8
5
 
9
- task :default => :test
6
+ task default: :test
10
7
 
11
8
  require "rake/testtask"
12
9
  Rake::TestTask.new(:test)
data/devenv.lock ADDED
@@ -0,0 +1,184 @@
1
+ {
2
+ "nodes": {
3
+ "devenv": {
4
+ "locked": {
5
+ "dir": "src/modules",
6
+ "lastModified": 1732830318,
7
+ "owner": "cachix",
8
+ "repo": "devenv",
9
+ "rev": "51abcb75d471a215c800937d4e30dc765d305c6d",
10
+ "type": "github"
11
+ },
12
+ "original": {
13
+ "dir": "src/modules",
14
+ "owner": "cachix",
15
+ "repo": "devenv",
16
+ "type": "github"
17
+ }
18
+ },
19
+ "flake-compat": {
20
+ "flake": false,
21
+ "locked": {
22
+ "lastModified": 1732722421,
23
+ "owner": "edolstra",
24
+ "repo": "flake-compat",
25
+ "rev": "9ed2ac151eada2306ca8c418ebd97807bb08f6ac",
26
+ "type": "github"
27
+ },
28
+ "original": {
29
+ "owner": "edolstra",
30
+ "repo": "flake-compat",
31
+ "type": "github"
32
+ }
33
+ },
34
+ "flake-compat_2": {
35
+ "flake": false,
36
+ "locked": {
37
+ "lastModified": 1732722421,
38
+ "owner": "edolstra",
39
+ "repo": "flake-compat",
40
+ "rev": "9ed2ac151eada2306ca8c418ebd97807bb08f6ac",
41
+ "type": "github"
42
+ },
43
+ "original": {
44
+ "owner": "edolstra",
45
+ "repo": "flake-compat",
46
+ "type": "github"
47
+ }
48
+ },
49
+ "flake-utils": {
50
+ "inputs": {
51
+ "systems": "systems"
52
+ },
53
+ "locked": {
54
+ "lastModified": 1731533236,
55
+ "owner": "numtide",
56
+ "repo": "flake-utils",
57
+ "rev": "11707dc2f618dd54ca8739b309ec4fc024de578b",
58
+ "type": "github"
59
+ },
60
+ "original": {
61
+ "owner": "numtide",
62
+ "repo": "flake-utils",
63
+ "type": "github"
64
+ }
65
+ },
66
+ "gitignore": {
67
+ "inputs": {
68
+ "nixpkgs": [
69
+ "pre-commit-hooks",
70
+ "nixpkgs"
71
+ ]
72
+ },
73
+ "locked": {
74
+ "lastModified": 1709087332,
75
+ "owner": "hercules-ci",
76
+ "repo": "gitignore.nix",
77
+ "rev": "637db329424fd7e46cf4185293b9cc8c88c95394",
78
+ "type": "github"
79
+ },
80
+ "original": {
81
+ "owner": "hercules-ci",
82
+ "repo": "gitignore.nix",
83
+ "type": "github"
84
+ }
85
+ },
86
+ "nixpkgs": {
87
+ "locked": {
88
+ "lastModified": 1732617236,
89
+ "owner": "nixos",
90
+ "repo": "nixpkgs",
91
+ "rev": "af51545ec9a44eadf3fe3547610a5cdd882bc34e",
92
+ "type": "github"
93
+ },
94
+ "original": {
95
+ "owner": "nixos",
96
+ "ref": "nixpkgs-unstable",
97
+ "repo": "nixpkgs",
98
+ "type": "github"
99
+ }
100
+ },
101
+ "nixpkgs-ruby": {
102
+ "inputs": {
103
+ "flake-compat": "flake-compat",
104
+ "flake-utils": "flake-utils",
105
+ "nixpkgs": [
106
+ "nixpkgs"
107
+ ]
108
+ },
109
+ "locked": {
110
+ "lastModified": 1730958464,
111
+ "owner": "bobvanderlinden",
112
+ "repo": "nixpkgs-ruby",
113
+ "rev": "93bd040be2856ba0e44a33db6360e8c9c0c09aa1",
114
+ "type": "github"
115
+ },
116
+ "original": {
117
+ "owner": "bobvanderlinden",
118
+ "repo": "nixpkgs-ruby",
119
+ "type": "github"
120
+ }
121
+ },
122
+ "nixpkgs-stable": {
123
+ "locked": {
124
+ "lastModified": 1732749044,
125
+ "owner": "NixOS",
126
+ "repo": "nixpkgs",
127
+ "rev": "0c5b4ecbed5b155b705336aa96d878e55acd8685",
128
+ "type": "github"
129
+ },
130
+ "original": {
131
+ "owner": "NixOS",
132
+ "ref": "nixos-24.05",
133
+ "repo": "nixpkgs",
134
+ "type": "github"
135
+ }
136
+ },
137
+ "pre-commit-hooks": {
138
+ "inputs": {
139
+ "flake-compat": "flake-compat_2",
140
+ "gitignore": "gitignore",
141
+ "nixpkgs": [
142
+ "nixpkgs"
143
+ ],
144
+ "nixpkgs-stable": "nixpkgs-stable"
145
+ },
146
+ "locked": {
147
+ "lastModified": 1732021966,
148
+ "owner": "cachix",
149
+ "repo": "pre-commit-hooks.nix",
150
+ "rev": "3308484d1a443fc5bc92012435d79e80458fe43c",
151
+ "type": "github"
152
+ },
153
+ "original": {
154
+ "owner": "cachix",
155
+ "repo": "pre-commit-hooks.nix",
156
+ "type": "github"
157
+ }
158
+ },
159
+ "root": {
160
+ "inputs": {
161
+ "devenv": "devenv",
162
+ "nixpkgs": "nixpkgs",
163
+ "nixpkgs-ruby": "nixpkgs-ruby",
164
+ "pre-commit-hooks": "pre-commit-hooks"
165
+ }
166
+ },
167
+ "systems": {
168
+ "locked": {
169
+ "lastModified": 1681028828,
170
+ "owner": "nix-systems",
171
+ "repo": "default",
172
+ "rev": "da67096a3b9bf56a91d16901293e51ba5b49a27e",
173
+ "type": "github"
174
+ },
175
+ "original": {
176
+ "owner": "nix-systems",
177
+ "repo": "default",
178
+ "type": "github"
179
+ }
180
+ }
181
+ },
182
+ "root": "root",
183
+ "version": 7
184
+ }
data/devenv.nix ADDED
@@ -0,0 +1,4 @@
1
+ {
2
+ languages.ruby.enable = true;
3
+ languages.ruby.version = "3.2";
4
+ }
data/devenv.yaml ADDED
@@ -0,0 +1,8 @@
1
+ inputs:
2
+ nixpkgs-ruby:
3
+ url: github:bobvanderlinden/nixpkgs-ruby
4
+ inputs:
5
+ nixpkgs:
6
+ follows: nixpkgs
7
+ nixpkgs:
8
+ url: github:nixos/nixpkgs/nixpkgs-unstable
@@ -1,3 +1,4 @@
1
+ # frozen_string_literal: true
1
2
  require_relative "lib/email_reply_trimmer"
2
3
 
3
4
  Gem::Specification.new do |s|
@@ -9,12 +10,17 @@ Gem::Specification.new do |s|
9
10
  s.description = "EmailReplyTrimmer is a small library to trim replies from plain text email."
10
11
 
11
12
  s.authors = ["Régis Hanol"]
12
- s.email = ["regis+rubygems@hanol.fr"]
13
+ s.email = ["rubygems@hanol.fr"]
13
14
 
14
15
  s.homepage = "https://github.com/discourse/email_reply_trimmer"
15
16
  s.license = "MIT"
16
17
 
17
18
  s.require_paths = ["lib"]
18
- s.files = Dir["**/*"].reject { |path| File.directory?(path) }
19
+ s.files = Dir["**/*"].reject { |path| File.directory?(path) || path =~ /.*\.gem$/ }
19
20
  s.test_files = s.files.select { |path| path =~ /^test\/.+_test\.rb$/ }
21
+
22
+ s.add_development_dependency 'rake', '~> 12'
23
+ s.add_development_dependency 'minitest', '~> 5'
24
+ s.add_development_dependency 'rubocop'
25
+ s.add_development_dependency 'rubocop-discourse'
20
26
  end
@@ -1,7 +1,8 @@
1
+ # frozen_string_literal: true
1
2
  class DelimiterMatcher
2
3
 
3
- DELIMITER_CHARACTERS ||= "-_,=+~#*ᐧ"
4
- DELIMITER_REGEX ||= /^[[:blank:]]*[#{Regexp.escape(DELIMITER_CHARACTERS)}]+[[:blank:]]*$/
4
+ DELIMITER_CHARACTERS = "-_,=+~#*ᐧ—"
5
+ DELIMITER_REGEX = /^[[:blank:]]*[#{Regexp.escape(DELIMITER_CHARACTERS)}]+[[:blank:]]*$/
5
6
 
6
7
  def self.match?(line)
7
8
  line =~ DELIMITER_REGEX
@@ -1,12 +1,13 @@
1
+ # frozen_string_literal: true
1
2
  class EmailHeaderMatcher
2
3
 
3
4
  EMAIL_HEADERS_WITH_DATE_MARKERS = [
4
- # Dutch
5
+ # Norwegian
5
6
  ["Sendt"],
6
7
  # English
7
- ["Sent"],
8
+ ["Sent", "Date"],
8
9
  # French
9
- ["Date"],
10
+ ["Date", "Le"],
10
11
  # German
11
12
  ["Gesendet"],
12
13
  # Portuguese
@@ -17,19 +18,25 @@ class EmailHeaderMatcher
17
18
  ["Fecha"],
18
19
  # Italian
19
20
  ["Data"],
21
+ # Dutch
22
+ ["Datum"],
23
+ # Swedish
24
+ ["Skickat"],
25
+ # Chinese
26
+ ["发送时间"],
20
27
  ]
21
28
 
22
29
  EMAIL_HEADERS_WITH_DATE_REGEXES = EMAIL_HEADERS_WITH_DATE_MARKERS.map do |header|
23
- /^[[:blank:]>\*]*(?:#{header.join("|")})[[:blank:]\*]*:.*\d+/
30
+ /^[[:blank:]*]*(?:#{header.join("|")})[[:blank:]*]*:.*\d+/
24
31
  end
25
32
 
26
33
  EMAIL_HEADERS_WITH_TEXT_MARKERS = [
27
- # Dutch
34
+ # Norwegian
28
35
  ["Fra", "Til", "Emne"],
29
36
  # English
30
37
  ["From", "To", "Cc", "Reply-To", "Subject"],
31
38
  # French
32
- ["De", "À", "Répondre à", "Objet"],
39
+ ["De", "Expéditeur", "À", "Destinataire", "Répondre à", "Objet"],
33
40
  # German
34
41
  ["Von", "An", "Betreff"],
35
42
  # Portuguese
@@ -37,11 +44,17 @@ class EmailHeaderMatcher
37
44
  # Spanish
38
45
  ["De", "Para", "Asunto"],
39
46
  # Italian
40
- ["Da", "Risposta", "A", "Oggetto"]
47
+ ["Da", "Risposta", "A", "Oggetto"],
48
+ # Dutch
49
+ ["Van", "Beantwoorden - Aan", "Aan", "Onderwerp"],
50
+ # Swedish
51
+ ["Från", "Till", "Ämne"],
52
+ # Chinese
53
+ ["发件人", "收件人", "主题"],
41
54
  ]
42
55
 
43
56
  EMAIL_HEADERS_WITH_TEXT_REGEXES = EMAIL_HEADERS_WITH_TEXT_MARKERS.map do |header|
44
- /^[[:blank:]>\*]*(?:#{header.join("|")})[[:blank:]\*]*:.*[[:word:]]+/
57
+ /^[[:blank:]*]*(?:#{header.join("|")})[[:blank:]*]*:.*[[:word:]]+/i
45
58
  end
46
59
 
47
60
  EMAIL_HEADER_REGEXES = [
@@ -1,3 +1,4 @@
1
+ # frozen_string_literal: true
1
2
  class EmbeddedEmailMatcher
2
3
 
3
4
  # On Wed, Sep 25, 2013, at 03:57 PM, jorge_castro wrote:
@@ -10,38 +11,43 @@ class EmbeddedEmailMatcher
10
11
  # Dnia 14 lip 2015 o godz. 00:25 Michael Downey <info@discourse.org> napisał(a):
11
12
  # Em seg, 27 de jul de 2015 17:13, Neil Lalonde <info@discourse.org> escreveu:
12
13
  # El jueves, 21 de noviembre de 2013, codinghorror escribió:
13
- # Am 03.02.2016 3:35 nachm. schrieb Max Mustermann <mail@example.com>:
14
- ON_DATE_SOMEONE_WROTE_MARKERS = [
14
+ # At 6/16/2016 08:32 PM, you wrote:
15
+ ON_DATE_SOMEONE_WROTE_REGEXES = [
16
+ # Chinese
17
+ /^[[:blank:]<>-]*在 (?:(?!\b(?>在|写道)\b).)+?写道[[:blank:].:>-]*$/im,
15
18
  # Dutch
16
- ["Op","het volgende geschreven"],
19
+ /^[[:blank:]<>-]*Op (?:(?!\b(?>Op|het\svolgende\sgeschreven|schreef)\b).)+?(het\svolgende\sgeschreven|schreef[^:]+)[[:blank:].:>-]*$/im,
17
20
  # English
18
- ["On", "wrote"],
21
+ /^[[:blank:]<>-]*In message (?:(?!\b(?>In message|writes)\b).)+?writes[[:blank:].:>-]*$/im,
22
+ /^[[:blank:]<>-]*(On|At) (?:(?!\b(?>On|wrote|writes|says|said)\b).)+?(wrote|writes|says|said)[[:blank:].:>-]*$/im,
19
23
  # French
20
- ["Le", "a écrit "],
24
+ /^[[:blank:]<>-]*Le (?:(?!\b(?>Le|nous\sa\sdit|a\s+écrit)\b).)+?(nous\sa\sdit|a\s+écrit)[[:blank:].:>-]*$/im,
25
+ # German
26
+ /^[[:blank:]<>-]*Am (?:(?!\b(?>Am|schrieben\sSie)\b).)+?schrieben\sSie[[:blank:].:>-]*$/im,
27
+ /^[[:blank:]<>-]*Am (?:(?!\b(?>Am|geschrieben)\b).)+?(geschrieben|schrieb[^:]+)[[:blank:].:>-]*$/im,
21
28
  # Italian
22
- ["Il", "ha scritto"],
29
+ /^[[:blank:]<>-]*Il (?:(?!\b(?>Il|ha\sscritto)\b).)+?ha\sscritto[[:blank:].:>-]*$/im,
23
30
  # Polish
24
- ["Dnia", "napisał\\(a\\)"],
31
+ /^[[:blank:]<>-]*(Dnia|Dňa) (?:(?!\b(?>Dnia|Dňa|napisał)\b).)+?napisał(\(a\))?[[:blank:].:>-]*$/im,
25
32
  # Portuguese
26
- ["Em", "escreveu"],
33
+ /^[[:blank:]<>-]*Em (?:(?!\b(?>Em|escreveu)\b).)+?escreveu[[:blank:].:>-]*$/im,
27
34
  # Spanish
28
- ["El", "escribió"],
29
- # German
30
- ["Am", "schrieb"],
35
+ /^[[:blank:]<>-]*El (?:(?!\b(?>El|escribió)\b).)+?escribió[[:blank:].:>-]*$/im,
31
36
  ]
32
37
 
33
- ON_DATE_SOMEONE_WROTE_REGEXES = ON_DATE_SOMEONE_WROTE_MARKERS.map do |on, wrote|
34
- wrote.gsub!(/ +/, "[[:space:]]+") # the "wrote" part might span over multiple lines
35
- /^([[:blank:]>\-]*#{on}\s(?:(?!#{on}\s|#{wrote}:?)[\s\S])*#{wrote}:?[[:blank:]\-]*)$/m
36
- end
37
-
38
38
  # Op 10 dec. 2015 18:35 schreef "Arpit Jalan" <info@discourse.org>:
39
39
  # Am 18.09.2013 um 16:24 schrieb codinghorror <info@discourse.org>:
40
+ # Den 15. jun. 2016 kl. 20.42 skrev Jeff Atwood <info@discourse.org>:
41
+ # søn. 30. apr. 2017 kl. 00.26 skrev David Taylor <meta@discoursemail.com>:
40
42
  ON_DATE_WROTE_SOMEONE_MARKERS = [
41
43
  # Dutch
42
44
  ["Op", "schreef"],
43
45
  # German
44
46
  ["Am", "schrieb"],
47
+ # Norwegian
48
+ ["Den", "skrev"],
49
+ # Dutch
50
+ ["søn\.", "skrev"],
45
51
  ]
46
52
 
47
53
  ON_DATE_WROTE_SOMEONE_REGEXES = ON_DATE_WROTE_SOMEONE_MARKERS.map do |on, wrote|
@@ -49,15 +55,31 @@ class EmbeddedEmailMatcher
49
55
  end
50
56
 
51
57
  # суббота, 14 марта 2015 г. пользователь etewiah написал:
58
+ # 23 mar 2017 21:25 "Neil Lalonde" <meta@discoursemail.com> napisał(a):
59
+ # 30 серп. 2016 р. 20:45 "Arpit" no-reply@example.com пише:
52
60
  DATE_SOMEONE_WROTE_MARKERS = [
53
61
  # Russian
54
62
  ["пользователь", "написал"],
63
+ # Polish
64
+ ["", "napisał\\(a\\)"],
65
+ # Ukrainian
66
+ ["", "пише"],
55
67
  ]
56
68
 
57
69
  DATE_SOMEONE_WROTE_REGEXES = DATE_SOMEONE_WROTE_MARKERS.map do |user, wrote|
58
- /.+#{user}.+#{wrote}:/
70
+ user.size == 0 ?
71
+ /\d{4}.{1,80}\n?.{0,80}?#{wrote}:/ :
72
+ /\d{4}.{1,80}#{user}.{0,80}\n?.{0,80}?#{wrote}:/
59
73
  end
60
74
 
75
+ # Max Mustermann <try_discourse@discoursemail.com> schrieb am Fr., 28. Apr. 2017 um 11:53 Uhr:
76
+ SOMEONE_WROTE_ON_DATE_REGEXES = [
77
+ # English
78
+ /^.+\bwrote\b[[:space:]]+\bon\b.+[^:]+:/,
79
+ # German
80
+ /^.+\bschrieb\b[[:space:]]+\bam\b.+[^:]+:/,
81
+ ]
82
+
61
83
  # 2016-03-03 17:21 GMT+01:00 Some One
62
84
  ISO_DATE_SOMEONE_REGEX = /^[[:blank:]>]*20\d\d-\d\d-\d\d \d\d:\d\d GMT\+\d\d:\d\d [\w[:blank:]]+$/
63
85
 
@@ -65,7 +87,7 @@ class EmbeddedEmailMatcher
65
87
  # 2013/10/2 camilohollanda <info@discourse.org>
66
88
  # вт, 5 янв. 2016 г. в 23:39, Erlend Sogge Heggen <info@discourse.org>:
67
89
  # ср, 1 апр. 2015, 18:29, Denis Didkovsky <info@discourse.org>:
68
- DATE_SOMEONE_EMAIL_REGEX = /^[[:blank:]>]*.*\d{4}.+<[^@<>]+@[^@<>.]+\.[^@<>]+>:?$/
90
+ DATE_SOMEONE_EMAIL_REGEX = /\d{4}.{1,80}\s?<[^@<>]+@[^@<>.]+\.[^@<>]+>:?$/
69
91
 
70
92
  # codinghorror via Discourse Meta wrote:
71
93
  # codinghorror via Discourse Meta <info@discourse.org> schrieb:
@@ -77,11 +99,12 @@ class EmbeddedEmailMatcher
77
99
  ]
78
100
 
79
101
  SOMEONE_VIA_SOMETHING_WROTE_REGEXES = SOMEONE_VIA_SOMETHING_WROTE_MARKERS.map do |wrote|
80
- /^[[:blank:]>]*.+ via .+ #{wrote}:?[[:blank:]]*$/
102
+ /^.+ via .+ #{wrote}:?[[:blank:]]*$/
81
103
  end
82
104
 
83
105
  # Some One <info@discourse.org> wrote:
84
- SOMEONE_EMAIL_WROTE_REGEX = /^[[:blank:]>]*.+ <.+@.+\..+> wrote:?/
106
+ # Gavin Sinclair (gsinclair@soyabean.com.au) wrote:
107
+ SOMEONE_EMAIL_WROTE_REGEX = /^.+\b[\w.+-]+@[\w.-]+\.\w{2,}\b.+wrote:?$/
85
108
 
86
109
  # Posted by mpalmer on 01/21/2016
87
110
  POSTED_BY_SOMEONE_ON_DATE_REGEX = /^[[:blank:]>]*Posted by .+ on \d{2}\/\d{2}\/\d{4}$/i
@@ -92,17 +115,21 @@ class EmbeddedEmailMatcher
92
115
  # ----- Original Message -----
93
116
  # -----Original Message-----
94
117
  # *----- Original Message -----*
118
+ # ----- Reply message -----
119
+ # ------------------ 原始邮件 ------------------
95
120
  FORWARDED_EMAIL_REGEXES = [
96
121
  # English
97
122
  /^[[:blank:]>]*Begin forwarded message:/i,
98
- /^[[:blank:]>]*Reply message/i,
99
- /^[[:blank:]>\*]*-{2,}[[:blank:]]*(Forwarded|Original) Message[[:blank:]]*-{2,}/i,
123
+ /^[[:blank:]>*]*-{2,}[[:blank:]]*(Forwarded|Original|Reply) Message[[:blank:]]*-{2,}/i,
100
124
  # French
101
- /^[[:blank:]>\*]*-{2,}[[:blank:]]*Message transféré[[:blank:]]*-{2,}/i,
125
+ /^[[:blank:]>]*Début du message transféré :/i,
126
+ /^[[:blank:]>*]*-{2,}[[:blank:]]*Message transféré[[:blank:]]*-{2,}/i,
102
127
  # German
103
- /^[[:blank:]>\*]*-{2,}[[:blank:]]*Ursprüngliche Nachricht[[:blank:]]*-{2,}/i,
128
+ /^[[:blank:]>*]*-{2,}[[:blank:]]*Ursprüngliche Nachricht[[:blank:]]*-{2,}/i,
104
129
  # Spanish
105
- /^[[:blank:]>\*]*-{2,}[[:blank:]]*Mensaje original[[:blank:]]*-{2,}/i,
130
+ /^[[:blank:]>*]*-{2,}[[:blank:]]*Mensaje original[[:blank:]]*-{2,}/i,
131
+ # Chinese
132
+ /^[[:blank:]>*]*-{2,}[[:blank:]]*原始邮件[[:blank:]]*-{2,}/i,
106
133
  ]
107
134
 
108
135
  EMBEDDED_REGEXES = [
@@ -110,6 +137,7 @@ class EmbeddedEmailMatcher
110
137
  ON_DATE_WROTE_SOMEONE_REGEXES,
111
138
  DATE_SOMEONE_WROTE_REGEXES,
112
139
  DATE_SOMEONE_EMAIL_REGEX,
140
+ SOMEONE_WROTE_ON_DATE_REGEXES,
113
141
  ISO_DATE_SOMEONE_REGEX,
114
142
  SOMEONE_VIA_SOMETHING_WROTE_REGEXES,
115
143
  SOMEONE_EMAIL_WROTE_REGEX,
@@ -1,3 +1,4 @@
1
+ # frozen_string_literal: true
1
2
  class EmptyLineMatcher
2
3
 
3
4
  def self.match?(line)
@@ -1,3 +1,4 @@
1
+ # frozen_string_literal: true
1
2
  class QuoteMatcher
2
3
 
3
4
  def self.match?(line)
@@ -1,3 +1,4 @@
1
+ # frozen_string_literal: true
1
2
  class SignatureMatcher
2
3
 
3
4
  # Envoyé depuis mon iPhone
@@ -15,22 +16,36 @@ class SignatureMatcher
15
16
  # 從我的 iPhone 傳送
16
17
  SIGNATURE_REGEXES = [
17
18
  # Chinese
18
- /^[[:blank:]>]*從我的 iPhone 傳送/i,
19
+ /^[[:blank:]]*從我的 iPhone 傳送/i,
19
20
  # English
20
- /^[[:blank:]>]*[[:word:]]+ from mobile/i,
21
- /^[[:blank:]>]*[\(<]*sent (?:from|via|with|by) .+[\)>]*/i,
22
- /^[[:blank:]>]*from my .{1,20}/i, # don't match too much
21
+ /^[[:blank:]]*[[:word:]]+ from mobile/i,
22
+ /^[[:blank:]]*[\(<]*Sent (from|via|with|by) .+[\)>]*/i,
23
+ /^[[:blank:]]*From my .{1,20}/i,
24
+ /^[[:blank:]]*Get Outlook for /i,
23
25
  # French
24
- /^[[:blank:]>]*Envoyé depuis mon .+/i,
26
+ /^[[:blank:]]*Envoyé depuis (mon|Yahoo Mail)/i,
25
27
  # German
26
- /^[[:blank:]>]*Von meinem .+ gesendet/i,
27
- /^[[:blank:]>]*Diese Nachricht wurde von .+ gesendet/i,
28
+ /^[[:blank:]]*Von meinem .+ gesendet/i,
29
+ /^[[:blank:]]*Diese Nachricht wurde von .+ gesendet/i,
30
+ # Italian
31
+ /^[[:blank:]]*Inviato da /i,
32
+ # Norwegian
33
+ /^[[:blank:]]*Sendt fra min /i,
34
+ # Portuguese
35
+ /^[[:blank:]]*Enviado do meu /i,
28
36
  # Spanish
29
- /^[[:blank:]>]*Enviado desde mi .+/i,
37
+ /^[[:blank:]]*Enviado desde mi /i,
38
+ # Dutch
39
+ /^[[:blank:]]*Verzonden met /i,
40
+ /^[[:blank:]]*Verstuurd vanaf mijn /i,
41
+ # Swedish
42
+ /^[[:blank:]]*från min /i,
30
43
  ]
31
44
 
32
45
  def self.match?(line)
33
- SIGNATURE_REGEXES.any? { |r| line =~ r }
46
+ # remove any markdown links
47
+ stripped = line.gsub(/\[([^\]]+)\]\([^\)]+\)/) { $1 }
48
+ SIGNATURE_REGEXES.any? { |r| stripped =~ r }
34
49
  end
35
50
 
36
51
  end