email_reply_trimmer 0.1.3 → 0.1.4

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: 6da75911e9abd808e0f27b0fe0207fb5cd2e6a7d
4
- data.tar.gz: 0ff5da3a54857c4822579fb5f4489915558eb335
3
+ metadata.gz: 61d47c5b31f114be7a83ff4ee618716f9a448726
4
+ data.tar.gz: 3654868aad0175023394df9c5a26fe5fd70f9ad5
5
5
  SHA512:
6
- metadata.gz: 8002eea84ed92894029cd6a690405d9db0c40bd2249acaa19adcd51ba319d27c117e0e32a5a8ea0e231fadd1a85ba9283550c6e857a2e057182e9c86d3e5f823
7
- data.tar.gz: 54165dc327a4425716f62c7bc0f21bacce2b4c29462bc5d801bd73938cc74b49de20e268dbd659cd443568284b810264f10599a24f569806466e6b6db2975239
6
+ metadata.gz: 62bb96a32309ea18a3d777fc52cfa900e46c6b62e65c0a010027b01938e39ab5cb0c4f2c68b10dd1308ca2c57bbd7e2df00cc18dfc15c1c4713cdff468387ad9
7
+ data.tar.gz: 69d4a9a6868904d4e8cebdff0031e9fcba389e227eb6be2068d54e480b6ef8f09de1f52d276adc5599c0c3deec72faa68d3f96abd11b06b5ca92e57d04e581b3
@@ -1,7 +1,7 @@
1
1
  class DelimiterMatcher
2
2
 
3
- DELIMITER_CHARACTERS ||= ['-', '_', '=', '+','~', '#', '*', 'ᐧ']
4
- DELIMITER_REGEX ||= /^[[:space:]]*[#{Regexp.escape(DELIMITER_CHARACTERS.join)}]+[[:space:]]*$/
3
+ DELIMITER_CHARACTERS ||= "-_,=+~#*ᐧ"
4
+ DELIMITER_REGEX ||= /^[[:space:]]*[#{Regexp.escape(DELIMITER_CHARACTERS)}]+[[:space:]]*$/
5
5
 
6
6
  def self.match?(line)
7
7
  line =~ DELIMITER_REGEX
@@ -6,7 +6,7 @@ require_relative "email_reply_trimmer/email_header_matcher"
6
6
  require_relative "email_reply_trimmer/quote_matcher"
7
7
 
8
8
  class EmailReplyTrimmer
9
- VERSION = "0.1.3"
9
+ VERSION = "0.1.4"
10
10
 
11
11
  DELIMITER = "d"
12
12
  EMBEDDED = "b"
@@ -17,12 +17,12 @@ class EmailReplyTrimmer
17
17
  TEXT = "t"
18
18
 
19
19
  def self.identify_line_content(line)
20
- return EMPTY if EmptyLineMatcher.match?(line)
21
- return DELIMITER if DelimiterMatcher.match?(line)
22
- return SIGNATURE if SignatureMatcher.match?(line)
23
- return EMBEDDED if EmbeddedEmailMatcher.match?(line)
24
- return EMAIL_HEADER if EmailHeaderMatcher.match?(line)
25
- return QUOTE if QuoteMatcher.match?(line)
20
+ return EMPTY if EmptyLineMatcher.match? line
21
+ return DELIMITER if DelimiterMatcher.match? line
22
+ return SIGNATURE if SignatureMatcher.match? line
23
+ return EMBEDDED if EmbeddedEmailMatcher.match? line
24
+ return EMAIL_HEADER if EmailHeaderMatcher.match? line
25
+ return QUOTE if QuoteMatcher.match? line
26
26
  return TEXT
27
27
  end
28
28
 
@@ -34,22 +34,26 @@ class EmailReplyTrimmer
34
34
 
35
35
  # fix embedded email markers that might span over multiple lines
36
36
  EmbeddedEmailMatcher::ON_DATE_SOMEONE_WROTE_REGEXES.each do |r|
37
- if text =~ r
38
- text.gsub!(r) { |m| m.gsub(/\n[[:space:]>\-]*/, " ") }
39
- end
37
+ text.gsub!(r) { |m| m.gsub(/\n[[:space:]>\-]*/, " ") }
40
38
  end
41
39
 
42
- removed = []
43
-
44
40
  # from now on, we'll work on a line-by-line basis
45
41
  lines = text.split("\n")
42
+ lines_dup = lines.dup
46
43
 
47
44
  # identify content of each lines
48
45
  pattern = lines.map { |l| identify_line_content(l) }.join
49
46
 
50
- # remove all signatures & delimiters
51
- while pattern =~ /[ds]/
52
- index = pattern =~ /[ds]/
47
+ # remove everything after the first delimiter
48
+ if pattern =~ /d/
49
+ index = pattern =~ /d/
50
+ pattern = pattern[0...index]
51
+ lines = lines[0...index]
52
+ end
53
+
54
+ # remove all mobile signatures
55
+ while pattern =~ /s/
56
+ index = pattern =~ /s/
53
57
  pattern.slice!(index)
54
58
  lines.slice!(index)
55
59
  end
@@ -58,7 +62,6 @@ class EmailReplyTrimmer
58
62
  # then take everything up to that marker
59
63
  if pattern =~ /te*b[^q]*$/
60
64
  index = pattern =~ /te*b[^q]*$/
61
- removed = lines[(index + 1)..-1]
62
65
  pattern = pattern[0..index]
63
66
  lines = lines[0..index]
64
67
  end
@@ -67,7 +70,6 @@ class EmailReplyTrimmer
67
70
  # then take everything up to that marker
68
71
  if pattern =~ /te*b[eqbh]*[te]*$/
69
72
  index = pattern =~ /te*b[eqbh]*[te]*$/
70
- removed = lines[(index + 1)..-1]
71
73
  pattern = pattern[0..index]
72
74
  lines = lines[0..index]
73
75
  end
@@ -75,8 +77,8 @@ class EmailReplyTrimmer
75
77
  # if there still are some embedded email markers, just remove them
76
78
  while pattern =~ /b/
77
79
  index = pattern =~ /b/
78
- pattern[index] = "e"
79
- lines[index] = ""
80
+ pattern.slice!(index)
81
+ lines.slice!(index)
80
82
  end
81
83
 
82
84
  # fix email headers when they span over multiple lines
@@ -90,7 +92,6 @@ class EmailReplyTrimmer
90
92
  # these headers
91
93
  if pattern =~ /t[eq]*h{3,}/
92
94
  index = pattern =~ /t[eq]*h{3,}/
93
- removed = lines[(index + 1)..-1]
94
95
  pattern = pattern[0..index]
95
96
  lines = lines[0..index]
96
97
  end
@@ -111,13 +112,32 @@ class EmailReplyTrimmer
111
112
 
112
113
  # results
113
114
  trimmed = lines.join("\n").strip
114
- elided = removed.join("\n").strip
115
115
 
116
116
  if split
117
- [trimmed, elided]
117
+ [trimmed, compute_elided(lines_dup, lines)]
118
118
  else
119
119
  trimmed
120
120
  end
121
121
  end
122
122
 
123
+ private
124
+
125
+ def self.compute_elided(text, lines)
126
+ elided = []
127
+
128
+ t = 0
129
+ l = 0
130
+
131
+ while t < text.size
132
+ while l < lines.size && text[t] == lines[l]
133
+ t += 1
134
+ l += 1
135
+ end
136
+ elided << text[t]
137
+ t += 1
138
+ end
139
+
140
+ elided.join("\n").strip
141
+ end
142
+
123
143
  end
@@ -0,0 +1,10 @@
1
+
2
+ --
3
+ ***
4
+ ####
5
+ ~~~~~
6
+ ======
7
+ _______
8
+ ++++++++
9
+
10
+ -------
@@ -1,3 +1,5 @@
1
+ ------------------------------
2
+
1
3
  *From:* Outlook user
2
4
  *Sent:* 2016-01-27
3
5
  *To:* info@discourse.org
@@ -1,3 +1,4 @@
1
+ ________________________________________
1
2
  From: Discourse <info@discourse.org.
2
3
  Sent: Thursday, 28 January 2016 8:16 p.m.
3
4
  To: Someone
@@ -8,6 +8,7 @@ Subject: VIS
8
8
  Here's an email with some very important stuff.
9
9
 
10
10
 
11
+ ________________________________
11
12
  Reply here<http://foo.bar> or hit reply from your inbox to help members by sharing your ideas.
12
13
  Mute this topic<http://42.wat> to stop getting updates, we'll send you the next one.
13
14
 
@@ -31,6 +31,7 @@ On Mon, Feb 1, 2016 at 6:32 PM, Jeff Atwood <info@discourse.org> wrote:
31
31
  >
32
32
 
33
33
 
34
+ --
34
35
  Some One
35
36
  Community Manager
36
37
  foo@bar.com
@@ -1,3 +1,8 @@
1
+ Sent from Outlook Mobile<https://foo.bar>
2
+
3
+
4
+
5
+
1
6
  On Sun, Feb 7, 2016 at 12:12 AM -0800, "Arpit Jalan" <arpit.jalan@discourse.org<mailto:arpit.jalan@discourse.org>> wrote:
2
7
 
3
8
  Hi Some,
@@ -15,7 +20,9 @@ On Fri, 5 Feb 2016 at 10:42, Some One <foo@bar.com<mailto:foo@bar.com>> wrote:
15
20
  Arpit,
16
21
  Yes that sounds good.
17
22
 
23
+ Sent from Outlook Mobile<https://foo.bar>
18
24
 
25
+ _____________________________
19
26
  From: Arpit Jalan <arpit.jalan@discourse.org<mailto:arpit.jalan@discourse.org>>
20
27
  Sent: Thursday, February 4, 2016 10:05 AM
21
28
  Subject: Meta Discourse update
@@ -0,0 +1,10 @@
1
+ On Tue, 2011-03-01 at 18:02 +0530, Some One wrote:
2
+
3
+ >
4
+ > This is another part of the embedded email.
5
+ >
6
+ >
7
+
8
+
9
+ _______________________
10
+ And here's my signature.
@@ -17,3 +17,6 @@ codinghorror via Discourse Meta <info@discourse.org> schrieb:
17
17
  >
18
18
  >To unsubscribe from these emails, visit your [user
19
19
  >preferences](http://meta.discourse.org/user_preferences).
20
+
21
+ --
22
+ Diese Nachricht wurde von meinem Android-Mobiltelefon mit K-9 Mail gesendet.
@@ -23,5 +23,6 @@
23
23
  > To unsubscribe from these emails, change your [user
24
24
  > preferences](https://meta.discourse.org/my/preferences)
25
25
 
26
+ --
26
27
  Stefano Costa @stekosteko
27
28
  Editor, Journal of Open Archaeology Data
@@ -1,3 +1,6 @@
1
+ --
2
+ Łukasz Jan Niemier
3
+
1
4
  Dnia 14 lip 2015 o godz. 00:25 Michael Downey <info@discourse.org> napisał(a):
2
5
 
3
6
  >
@@ -0,0 +1 @@
1
+ On Mon, Aug 19, 2013 at 2:36 AM, SomeOne via Discourse Meta < info@discourse.org> wrote:
@@ -7,3 +7,4 @@ Asunto: [MP]Parser del email
7
7
  Visita el tema o responde a este email para publicar.
8
8
  Para no recibir m=C3=A1s notificaciones de este tema en particular, haz cli=
9
9
  c aqu=C3=AD. Para darte de baja de estos emails, cambia tus preferencias
10
+ =
@@ -0,0 +1,6 @@
1
+ ---------- Forwarded message ----------
2
+ From: Some One <foo@bar.com>
3
+ Date: Thu, Jan 28, 2016 at 4:00 PM
4
+ Subject: Some subject that
5
+ spans over 2 lines
6
+ To: infod@discourse.org
@@ -0,0 +1,26 @@
1
+ Envoyé depuis mon iPhone
2
+
3
+ Von meinem Mobilgerät gesendet
4
+ Diese Nachricht wurde von meinem Android-Mobiltelefon mit K-9 Mail gesendet.
5
+
6
+ Someone from mobile
7
+ From My Iphone 6
8
+ Sent via mobile
9
+ Sent with Airmail
10
+ Sent from Windows Mail
11
+ Sent from Mailbox
12
+ Sent from Mailbox for iPad
13
+ Sent from Yahoo Mail on Android
14
+ Sent from my TI-85
15
+ Sent from my iPhone
16
+ Sent from my iPod
17
+ Sent from my Alcatel Flash2
18
+ Sent from my mobile device
19
+ Sent from my cell, please excuse any typos.
20
+ Sent from my Samsung Galaxy s5 Octacore device
21
+ Sent from my HTC M8 Android phone. Please excuse typoze
22
+ Sent from my Windows 8 PC <http://windows.microsoft.com/consumer-preview>
23
+ <<sent by galaxy>>
24
+ (sent from a phone)
25
+ (Sent from mobile device)
26
+ 從我的 iPhone 傳送
@@ -0,0 +1,7 @@
1
+ --
2
+ Mit lieben Grüßen
3
+
4
+ John Doe
5
+ http://blog.john.doe
6
+ www.facebook.com/johndoe
7
+ Mobil: +12 345 6789 012
@@ -0,0 +1,9 @@
1
+ Mal sehen was hier mit der Signatur passiert!
2
+
3
+ --
4
+ Mit lieben Grüßen
5
+
6
+ John Doe
7
+ http://blog.john.doe
8
+ www.facebook.com/johndoe
9
+ Mobil: +12 345 6789 012
@@ -1,7 +1,7 @@
1
1
  require "minitest/autorun"
2
2
  require "email_reply_trimmer"
3
3
 
4
- class TestEmailReplyTrimmer < Minitest::Unit::TestCase
4
+ class TestEmailReplyTrimmer < Minitest::Test
5
5
 
6
6
  EMAILS = Dir["test/emails/*.txt"].map { |path| File.basename(path) }
7
7
  TRIMMED = Dir["test/trimmed/*.txt"].map { |path| File.basename(path) }
@@ -1,5 +1,4 @@
1
1
  This is a line before the embedded email.
2
-
3
2
  > Hello
4
3
  >
5
4
  > This is the embedded email.
@@ -9,11 +8,3 @@ This is some text
9
8
  after the
10
9
 
11
10
  embedded email.
12
-
13
- >
14
- > This is another part of the embedded email.
15
- >
16
- >
17
-
18
-
19
- And here's my signature.
@@ -1,3 +1 @@
1
1
  Oh, I've forgot to add. MIT
2
-
3
- Łukasz Jan Niemier
@@ -0,0 +1 @@
1
+ Mal sehen was hier mit der Signatur passiert!
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: email_reply_trimmer
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.1.3
4
+ version: 0.1.4
5
5
  platform: ruby
6
6
  authors:
7
7
  - Régis Hanol
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2016-03-31 00:00:00.000000000 Z
11
+ date: 2016-10-12 00:00:00.000000000 Z
12
12
  dependencies: []
13
13
  description: EmailReplyTrimmer is a small library to trim replies from plain text
14
14
  email.
@@ -68,6 +68,7 @@ files:
68
68
  - test/elided/signatures.txt
69
69
  - test/elided/strip.txt
70
70
  - test/elided/text_only.txt
71
+ - test/elided/usenet.txt
71
72
  - test/emails/delimiters.txt
72
73
  - test/emails/dual_embedded.txt
73
74
  - test/emails/email_headers_1.txt
@@ -107,6 +108,7 @@ files:
107
108
  - test/emails/signatures.txt
108
109
  - test/emails/strip.txt
109
110
  - test/emails/text_only.txt
111
+ - test/emails/usenet.txt
110
112
  - test/test_email_reply_trimmer.rb
111
113
  - test/trimmed/delimiters.txt
112
114
  - test/trimmed/dual_embedded.txt
@@ -147,6 +149,7 @@ files:
147
149
  - test/trimmed/signatures.txt
148
150
  - test/trimmed/strip.txt
149
151
  - test/trimmed/text_only.txt
152
+ - test/trimmed/usenet.txt
150
153
  homepage: https://github.com/discourse/email_reply_trimmer
151
154
  licenses:
152
155
  - MIT