email_reply_trimmer 0.1.3 → 0.1.4

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: 6da75911e9abd808e0f27b0fe0207fb5cd2e6a7d
4
- data.tar.gz: 0ff5da3a54857c4822579fb5f4489915558eb335
3
+ metadata.gz: 61d47c5b31f114be7a83ff4ee618716f9a448726
4
+ data.tar.gz: 3654868aad0175023394df9c5a26fe5fd70f9ad5
5
5
  SHA512:
6
- metadata.gz: 8002eea84ed92894029cd6a690405d9db0c40bd2249acaa19adcd51ba319d27c117e0e32a5a8ea0e231fadd1a85ba9283550c6e857a2e057182e9c86d3e5f823
7
- data.tar.gz: 54165dc327a4425716f62c7bc0f21bacce2b4c29462bc5d801bd73938cc74b49de20e268dbd659cd443568284b810264f10599a24f569806466e6b6db2975239
6
+ metadata.gz: 62bb96a32309ea18a3d777fc52cfa900e46c6b62e65c0a010027b01938e39ab5cb0c4f2c68b10dd1308ca2c57bbd7e2df00cc18dfc15c1c4713cdff468387ad9
7
+ data.tar.gz: 69d4a9a6868904d4e8cebdff0031e9fcba389e227eb6be2068d54e480b6ef8f09de1f52d276adc5599c0c3deec72faa68d3f96abd11b06b5ca92e57d04e581b3
@@ -1,7 +1,7 @@
1
1
  class DelimiterMatcher
2
2
 
3
- DELIMITER_CHARACTERS ||= ['-', '_', '=', '+','~', '#', '*', 'ᐧ']
4
- DELIMITER_REGEX ||= /^[[:space:]]*[#{Regexp.escape(DELIMITER_CHARACTERS.join)}]+[[:space:]]*$/
3
+ DELIMITER_CHARACTERS ||= "-_,=+~#*ᐧ"
4
+ DELIMITER_REGEX ||= /^[[:space:]]*[#{Regexp.escape(DELIMITER_CHARACTERS)}]+[[:space:]]*$/
5
5
 
6
6
  def self.match?(line)
7
7
  line =~ DELIMITER_REGEX
@@ -6,7 +6,7 @@ require_relative "email_reply_trimmer/email_header_matcher"
6
6
  require_relative "email_reply_trimmer/quote_matcher"
7
7
 
8
8
  class EmailReplyTrimmer
9
- VERSION = "0.1.3"
9
+ VERSION = "0.1.4"
10
10
 
11
11
  DELIMITER = "d"
12
12
  EMBEDDED = "b"
@@ -17,12 +17,12 @@ class EmailReplyTrimmer
17
17
  TEXT = "t"
18
18
 
19
19
  def self.identify_line_content(line)
20
- return EMPTY if EmptyLineMatcher.match?(line)
21
- return DELIMITER if DelimiterMatcher.match?(line)
22
- return SIGNATURE if SignatureMatcher.match?(line)
23
- return EMBEDDED if EmbeddedEmailMatcher.match?(line)
24
- return EMAIL_HEADER if EmailHeaderMatcher.match?(line)
25
- return QUOTE if QuoteMatcher.match?(line)
20
+ return EMPTY if EmptyLineMatcher.match? line
21
+ return DELIMITER if DelimiterMatcher.match? line
22
+ return SIGNATURE if SignatureMatcher.match? line
23
+ return EMBEDDED if EmbeddedEmailMatcher.match? line
24
+ return EMAIL_HEADER if EmailHeaderMatcher.match? line
25
+ return QUOTE if QuoteMatcher.match? line
26
26
  return TEXT
27
27
  end
28
28
 
@@ -34,22 +34,26 @@ class EmailReplyTrimmer
34
34
 
35
35
  # fix embedded email markers that might span over multiple lines
36
36
  EmbeddedEmailMatcher::ON_DATE_SOMEONE_WROTE_REGEXES.each do |r|
37
- if text =~ r
38
- text.gsub!(r) { |m| m.gsub(/\n[[:space:]>\-]*/, " ") }
39
- end
37
+ text.gsub!(r) { |m| m.gsub(/\n[[:space:]>\-]*/, " ") }
40
38
  end
41
39
 
42
- removed = []
43
-
44
40
  # from now on, we'll work on a line-by-line basis
45
41
  lines = text.split("\n")
42
+ lines_dup = lines.dup
46
43
 
47
44
  # identify content of each lines
48
45
  pattern = lines.map { |l| identify_line_content(l) }.join
49
46
 
50
- # remove all signatures & delimiters
51
- while pattern =~ /[ds]/
52
- index = pattern =~ /[ds]/
47
+ # remove everything after the first delimiter
48
+ if pattern =~ /d/
49
+ index = pattern =~ /d/
50
+ pattern = pattern[0...index]
51
+ lines = lines[0...index]
52
+ end
53
+
54
+ # remove all mobile signatures
55
+ while pattern =~ /s/
56
+ index = pattern =~ /s/
53
57
  pattern.slice!(index)
54
58
  lines.slice!(index)
55
59
  end
@@ -58,7 +62,6 @@ class EmailReplyTrimmer
58
62
  # then take everything up to that marker
59
63
  if pattern =~ /te*b[^q]*$/
60
64
  index = pattern =~ /te*b[^q]*$/
61
- removed = lines[(index + 1)..-1]
62
65
  pattern = pattern[0..index]
63
66
  lines = lines[0..index]
64
67
  end
@@ -67,7 +70,6 @@ class EmailReplyTrimmer
67
70
  # then take everything up to that marker
68
71
  if pattern =~ /te*b[eqbh]*[te]*$/
69
72
  index = pattern =~ /te*b[eqbh]*[te]*$/
70
- removed = lines[(index + 1)..-1]
71
73
  pattern = pattern[0..index]
72
74
  lines = lines[0..index]
73
75
  end
@@ -75,8 +77,8 @@ class EmailReplyTrimmer
75
77
  # if there still are some embedded email markers, just remove them
76
78
  while pattern =~ /b/
77
79
  index = pattern =~ /b/
78
- pattern[index] = "e"
79
- lines[index] = ""
80
+ pattern.slice!(index)
81
+ lines.slice!(index)
80
82
  end
81
83
 
82
84
  # fix email headers when they span over multiple lines
@@ -90,7 +92,6 @@ class EmailReplyTrimmer
90
92
  # these headers
91
93
  if pattern =~ /t[eq]*h{3,}/
92
94
  index = pattern =~ /t[eq]*h{3,}/
93
- removed = lines[(index + 1)..-1]
94
95
  pattern = pattern[0..index]
95
96
  lines = lines[0..index]
96
97
  end
@@ -111,13 +112,32 @@ class EmailReplyTrimmer
111
112
 
112
113
  # results
113
114
  trimmed = lines.join("\n").strip
114
- elided = removed.join("\n").strip
115
115
 
116
116
  if split
117
- [trimmed, elided]
117
+ [trimmed, compute_elided(lines_dup, lines)]
118
118
  else
119
119
  trimmed
120
120
  end
121
121
  end
122
122
 
123
+ private
124
+
125
+ def self.compute_elided(text, lines)
126
+ elided = []
127
+
128
+ t = 0
129
+ l = 0
130
+
131
+ while t < text.size
132
+ while l < lines.size && text[t] == lines[l]
133
+ t += 1
134
+ l += 1
135
+ end
136
+ elided << text[t]
137
+ t += 1
138
+ end
139
+
140
+ elided.join("\n").strip
141
+ end
142
+
123
143
  end
@@ -0,0 +1,10 @@
1
+
2
+ --
3
+ ***
4
+ ####
5
+ ~~~~~
6
+ ======
7
+ _______
8
+ ++++++++
9
+
10
+ -------
@@ -1,3 +1,5 @@
1
+ ------------------------------
2
+
1
3
  *From:* Outlook user
2
4
  *Sent:* 2016-01-27
3
5
  *To:* info@discourse.org
@@ -1,3 +1,4 @@
1
+ ________________________________________
1
2
  From: Discourse <info@discourse.org.
2
3
  Sent: Thursday, 28 January 2016 8:16 p.m.
3
4
  To: Someone
@@ -8,6 +8,7 @@ Subject: VIS
8
8
  Here's an email with some very important stuff.
9
9
 
10
10
 
11
+ ________________________________
11
12
  Reply here<http://foo.bar> or hit reply from your inbox to help members by sharing your ideas.
12
13
  Mute this topic<http://42.wat> to stop getting updates, we'll send you the next one.
13
14
 
@@ -31,6 +31,7 @@ On Mon, Feb 1, 2016 at 6:32 PM, Jeff Atwood <info@discourse.org> wrote:
31
31
  >
32
32
 
33
33
 
34
+ --
34
35
  Some One
35
36
  Community Manager
36
37
  foo@bar.com
@@ -1,3 +1,8 @@
1
+ Sent from Outlook Mobile<https://foo.bar>
2
+
3
+
4
+
5
+
1
6
  On Sun, Feb 7, 2016 at 12:12 AM -0800, "Arpit Jalan" <arpit.jalan@discourse.org<mailto:arpit.jalan@discourse.org>> wrote:
2
7
 
3
8
  Hi Some,
@@ -15,7 +20,9 @@ On Fri, 5 Feb 2016 at 10:42, Some One <foo@bar.com<mailto:foo@bar.com>> wrote:
15
20
  Arpit,
16
21
  Yes that sounds good.
17
22
 
23
+ Sent from Outlook Mobile<https://foo.bar>
18
24
 
25
+ _____________________________
19
26
  From: Arpit Jalan <arpit.jalan@discourse.org<mailto:arpit.jalan@discourse.org>>
20
27
  Sent: Thursday, February 4, 2016 10:05 AM
21
28
  Subject: Meta Discourse update
@@ -0,0 +1,10 @@
1
+ On Tue, 2011-03-01 at 18:02 +0530, Some One wrote:
2
+
3
+ >
4
+ > This is another part of the embedded email.
5
+ >
6
+ >
7
+
8
+
9
+ _______________________
10
+ And here's my signature.
@@ -17,3 +17,6 @@ codinghorror via Discourse Meta <info@discourse.org> schrieb:
17
17
  >
18
18
  >To unsubscribe from these emails, visit your [user
19
19
  >preferences](http://meta.discourse.org/user_preferences).
20
+
21
+ --
22
+ Diese Nachricht wurde von meinem Android-Mobiltelefon mit K-9 Mail gesendet.
@@ -23,5 +23,6 @@
23
23
  > To unsubscribe from these emails, change your [user
24
24
  > preferences](https://meta.discourse.org/my/preferences)
25
25
 
26
+ --
26
27
  Stefano Costa @stekosteko
27
28
  Editor, Journal of Open Archaeology Data
@@ -1,3 +1,6 @@
1
+ --
2
+ Łukasz Jan Niemier
3
+
1
4
  Dnia 14 lip 2015 o godz. 00:25 Michael Downey <info@discourse.org> napisał(a):
2
5
 
3
6
  >
@@ -0,0 +1 @@
1
+ On Mon, Aug 19, 2013 at 2:36 AM, SomeOne via Discourse Meta < info@discourse.org> wrote:
@@ -7,3 +7,4 @@ Asunto: [MP]Parser del email
7
7
  Visita el tema o responde a este email para publicar.
8
8
  Para no recibir m=C3=A1s notificaciones de este tema en particular, haz cli=
9
9
  c aqu=C3=AD. Para darte de baja de estos emails, cambia tus preferencias
10
+ =
@@ -0,0 +1,6 @@
1
+ ---------- Forwarded message ----------
2
+ From: Some One <foo@bar.com>
3
+ Date: Thu, Jan 28, 2016 at 4:00 PM
4
+ Subject: Some subject that
5
+ spans over 2 lines
6
+ To: infod@discourse.org
@@ -0,0 +1,26 @@
1
+ Envoyé depuis mon iPhone
2
+
3
+ Von meinem Mobilgerät gesendet
4
+ Diese Nachricht wurde von meinem Android-Mobiltelefon mit K-9 Mail gesendet.
5
+
6
+ Someone from mobile
7
+ From My Iphone 6
8
+ Sent via mobile
9
+ Sent with Airmail
10
+ Sent from Windows Mail
11
+ Sent from Mailbox
12
+ Sent from Mailbox for iPad
13
+ Sent from Yahoo Mail on Android
14
+ Sent from my TI-85
15
+ Sent from my iPhone
16
+ Sent from my iPod
17
+ Sent from my Alcatel Flash2
18
+ Sent from my mobile device
19
+ Sent from my cell, please excuse any typos.
20
+ Sent from my Samsung Galaxy s5 Octacore device
21
+ Sent from my HTC M8 Android phone. Please excuse typoze
22
+ Sent from my Windows 8 PC <http://windows.microsoft.com/consumer-preview>
23
+ <<sent by galaxy>>
24
+ (sent from a phone)
25
+ (Sent from mobile device)
26
+ 從我的 iPhone 傳送
@@ -0,0 +1,7 @@
1
+ --
2
+ Mit lieben Grüßen
3
+
4
+ John Doe
5
+ http://blog.john.doe
6
+ www.facebook.com/johndoe
7
+ Mobil: +12 345 6789 012
@@ -0,0 +1,9 @@
1
+ Mal sehen was hier mit der Signatur passiert!
2
+
3
+ --
4
+ Mit lieben Grüßen
5
+
6
+ John Doe
7
+ http://blog.john.doe
8
+ www.facebook.com/johndoe
9
+ Mobil: +12 345 6789 012
@@ -1,7 +1,7 @@
1
1
  require "minitest/autorun"
2
2
  require "email_reply_trimmer"
3
3
 
4
- class TestEmailReplyTrimmer < Minitest::Unit::TestCase
4
+ class TestEmailReplyTrimmer < Minitest::Test
5
5
 
6
6
  EMAILS = Dir["test/emails/*.txt"].map { |path| File.basename(path) }
7
7
  TRIMMED = Dir["test/trimmed/*.txt"].map { |path| File.basename(path) }
@@ -1,5 +1,4 @@
1
1
  This is a line before the embedded email.
2
-
3
2
  > Hello
4
3
  >
5
4
  > This is the embedded email.
@@ -9,11 +8,3 @@ This is some text
9
8
  after the
10
9
 
11
10
  embedded email.
12
-
13
- >
14
- > This is another part of the embedded email.
15
- >
16
- >
17
-
18
-
19
- And here's my signature.
@@ -1,3 +1 @@
1
1
  Oh, I've forgot to add. MIT
2
-
3
- Łukasz Jan Niemier
@@ -0,0 +1 @@
1
+ Mal sehen was hier mit der Signatur passiert!
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: email_reply_trimmer
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.1.3
4
+ version: 0.1.4
5
5
  platform: ruby
6
6
  authors:
7
7
  - Régis Hanol
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2016-03-31 00:00:00.000000000 Z
11
+ date: 2016-10-12 00:00:00.000000000 Z
12
12
  dependencies: []
13
13
  description: EmailReplyTrimmer is a small library to trim replies from plain text
14
14
  email.
@@ -68,6 +68,7 @@ files:
68
68
  - test/elided/signatures.txt
69
69
  - test/elided/strip.txt
70
70
  - test/elided/text_only.txt
71
+ - test/elided/usenet.txt
71
72
  - test/emails/delimiters.txt
72
73
  - test/emails/dual_embedded.txt
73
74
  - test/emails/email_headers_1.txt
@@ -107,6 +108,7 @@ files:
107
108
  - test/emails/signatures.txt
108
109
  - test/emails/strip.txt
109
110
  - test/emails/text_only.txt
111
+ - test/emails/usenet.txt
110
112
  - test/test_email_reply_trimmer.rb
111
113
  - test/trimmed/delimiters.txt
112
114
  - test/trimmed/dual_embedded.txt
@@ -147,6 +149,7 @@ files:
147
149
  - test/trimmed/signatures.txt
148
150
  - test/trimmed/strip.txt
149
151
  - test/trimmed/text_only.txt
152
+ - test/trimmed/usenet.txt
150
153
  homepage: https://github.com/discourse/email_reply_trimmer
151
154
  licenses:
152
155
  - MIT