email_reply_parser 0.5.4 → 0.5.5
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/email_reply_parser.gemspec +1 -0
- data/lib/email_reply_parser.rb +11 -4
- data/test/email_reply_parser_test.rb +6 -0
- data/test/emails/pathological.txt +20 -0
- metadata +3 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 7ac6a5b1b44a4c6b58ec03ad6bca58d2bbfca5ab
|
4
|
+
data.tar.gz: a4c485aea71c6fdc5b221c80d7b37189ee7185ff
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 8409fbf234b0c3c8f63ba6cae894f9301b588ca065ce69e802278ef6083f31599698d61f83555e2cd4194abfad50c223358fb5994ccc4315f08a6bb5e879b5eb
|
7
|
+
data.tar.gz: 86c7e2e2ea27d775dc530503687e9ef116d97d612736ec13e08c4077eb631e5e76179a3d739ab6a6e07a8c41d1520ea35cb435c750c2fefd8b17392601914e7d
|
data/email_reply_parser.gemspec
CHANGED
data/lib/email_reply_parser.rb
CHANGED
@@ -30,7 +30,7 @@ require 'strscan'
|
|
30
30
|
#
|
31
31
|
# [mail]: https://github.com/mikel/mail
|
32
32
|
class EmailReplyParser
|
33
|
-
VERSION = "0.5.
|
33
|
+
VERSION = "0.5.5"
|
34
34
|
|
35
35
|
# Public: Splits an email body into a list of Fragments.
|
36
36
|
#
|
@@ -127,7 +127,14 @@ class EmailReplyParser
|
|
127
127
|
|
128
128
|
private
|
129
129
|
EMPTY = "".freeze
|
130
|
-
|
130
|
+
SIGNATURE = '(?m)(--|__|\w-$)|(^(\w+\s*){1,3} ym morf tneS$)'
|
131
|
+
|
132
|
+
begin
|
133
|
+
require 're2'
|
134
|
+
SIG_REGEX = RE2::Regexp.new(SIGNATURE)
|
135
|
+
rescue LoadError
|
136
|
+
SIG_REGEX = Regexp.new(SIGNATURE)
|
137
|
+
end
|
131
138
|
|
132
139
|
### Line-by-Line Parsing
|
133
140
|
|
@@ -139,7 +146,7 @@ class EmailReplyParser
|
|
139
146
|
# Returns nothing.
|
140
147
|
def scan_line(line)
|
141
148
|
line.chomp!("\n")
|
142
|
-
line.lstrip! unless line
|
149
|
+
line.lstrip! unless SIG_REGEX.match(line)
|
143
150
|
|
144
151
|
# We're looking for leading `>`'s to see if this line is part of a
|
145
152
|
# quoted Fragment.
|
@@ -148,7 +155,7 @@ class EmailReplyParser
|
|
148
155
|
# Mark the current Fragment as a signature if the current line is empty
|
149
156
|
# and the Fragment starts with a common signature indicator.
|
150
157
|
if @fragment && line == EMPTY
|
151
|
-
if @fragment.lines.last
|
158
|
+
if SIG_REGEX.match @fragment.lines.last
|
152
159
|
@fragment.signature = true
|
153
160
|
finish_fragment
|
154
161
|
end
|
@@ -156,6 +156,12 @@ I am currently using the Java HTTP API.\n", reply.fragments[0].to_s
|
|
156
156
|
assert_match /^On Oct 1, 2012/, reply.fragments[1].to_s
|
157
157
|
end
|
158
158
|
|
159
|
+
def test_pathological_emails
|
160
|
+
t0 = Time.now
|
161
|
+
reply = email("pathological")
|
162
|
+
assert (Time.now - t0) < 1, "Took too long, upgrade to re2 gem."
|
163
|
+
end
|
164
|
+
|
159
165
|
def email(name)
|
160
166
|
body = IO.read EMAIL_FIXTURE_PATH.join("#{name}.txt").to_s
|
161
167
|
EmailReplyParser.read body
|
@@ -0,0 +1,20 @@
|
|
1
|
+
I think you're onto something. I will try to fix the problem as soon as I
|
2
|
+
get back to a computer.
|
3
|
+
On Dec 8, 2013 2:10 PM, "John Sullivan" <notifications@github.com> wrote:
|
4
|
+
|
5
|
+
> I think your code is shortening the reference sequence you return to be
|
6
|
+
> the same size as the query sequence, and we end up losing data. Here's some
|
7
|
+
> debugging output from me putzing around...
|
8
|
+
>
|
9
|
+
> name: gi|253409428|ref|GQ227366.1| Influenza A virus (A/pika/Qinghai/BI/2007(H5N1)) segment 1 polymerase PB2 (PB2) gene, complete cds
|
10
|
+
> score: 39.0
|
11
|
+
>
|
12
|
+
> organism.sequence: ATGGAGAGAATAAAGGAATTAAGAGATCTAATGTCACAGTCCCGCACTCGCGAGATACTAACAAAGACCACTGTGGACCATATGGCCATAATCAAGAAATACACATCAGGAAGACAAGAGAAGAACCCTGCTCTCAGAATGAAATGGATGATGGCAATGAAATATCCAATCACAGCGGACAAGAGAATAATAGAGATGATTCCTGAAAGGAATGAACAAGGACAGACACTCTGGAGCAAGACAAATGATGCTGGATCGGACAGGGTGATGGTGTCTCCCCTAGCTGTAACTTGGTGGAATAGGAATGGGCCGACGACAAGTACAGTTCATTATCCAAAGGTTTACAAAACATACTTTGAGAAGGTTGAAAGGTTAAAACATGGAACCTTCGGTCCCGTTCATTTCCGAAACCAAGTTAAAATACGCCGCCGAGTTGATACAAATCCTGGCCATGCAGATCTCAGTGCTAAAGAAGCACAAGATGTCATCATGGAGGTCGTTTTCCCAAATGAAGTGGGAGCTAGAATATTGACTTCAGAGTCACAGTTGACAATAACGAAAGAGAAAAAAGAAGAGCTCCAAGATTGTAAGATTGCTCCCTTAATGGTTGCATACATGTTGGAAAGGGAACTGGTCCGCAAAACCAGATTCCTACCAGTAGCAGGCGGAACAAGCAGTGTGTACATTGAGGTATTGCATTTGACTCAAGGAACCTGCTGGGCACAGATGTACACTCCAGGCGGAGAAGTAAGAAATGACGATGTTGACCAGAGTTTGATCATTGCTGCCAGAAACATTGTTAGGAGAGCAACGGTATCAGCGGATCCACTGGCATCACTGCTGGAGATGTGTCACAGCACACAAATTGGTGGGATAAGGATGGTGGACATCCTTAGGCAAACTCCAACTGAGGAACAAGCTGTGGATATATGCAAAGCAGCAATGGGTCTGAGGATTAGTTCATCCTTTAGCTTTGGAG
|
13
|
+
> GCTTCACTTTCAAAAGAACAAGTGGATCATCCGCCACGAAGGAAGAGGAAGTGCTTACAGGCAACCTCCAAACATTGAAAATAAGAGTACATGAGGGGTATGAGGAGTTCACAATGGTTGGGCAGAGGGCAACAGCTATCCTGAGGAAAGCAACTAGAAGGCTGATTCAGTTGATAGTAAGTGGAAGAAACGAACAATCAATCGCTGAGGCAATCATTGTAGCAATGGTGTTCTCACAGGAGGATCGCATGATAAAAGCAGTCCGAGGCGATCTGAATTTCGTAAACAGAGCAAACCAAAGATTAAACCCCATGCATCAACTCCTGAGACATTTTCAAAAGGACGCAAAAGTGCTATTTCAGAATTGGGGAACTGAGCCAATTGATAATGTCATGGGGATGATCGGAATATTACCTGACATGACTCCCAGCACAGAAACGTCACTGAGAGGAGTGAGAGTTAGTAAAATGGGAGTAGATGAGTATTCCAGCACTGAGAGAGTAGTTGTAAGCATTGACCGCTTCTTAAGGGTTCGAGACCAGCGGGGGAACGTACTCTTATCTCCCGAAGAGGTCAGCGAAACCCAGGGAACAGAGAAGTTGACAATAACATATTCATCATCAATGATGTGGGAAATCAACGGTCCTGAGTCAGTGCTTGTTAACACTTACCAATGGATCATTAGAAACTGGGAGACCGTGAAAATTCAGTGGTCTCAGGACCCCACGATGTTGTACAATAAGATGGAGTTTGAACCGTTCCAATCCTTGGTACCTAAAGCTGCCAGAGGTCAATACAGTGGATTTGTGAGAACATTATTCCAACAAATGCGTGACGTACTGGGGACATTTGATACTGTCCAGATAATAAAGCTGCTACCATTTGCAGCAGCCCCACCGAAGCAGAGCAGAATGCAGTTTTCTTCTCTAACTGTGAATGTGAGAGGCTCAGGAATGAGAATACTCATAAGGGGCAATTCCCCTGTGTTCAACTACAA
|
14
|
+
> TAAGGCAACCCAAAGACTTACCGTTCTTGGAAAGGACGCAGGTGCATTAACAGAGGATCCAGATGAGGGGACAGCCGGAGTGGAATCTGCAGTACTGAGGGGGTTCCTAATTCTAGGCAAGGAGGACAAAAGATATGGACCAGCATTGAGCATCAATGAACTGAGCAATCTTGCAAAAGGGGAGAAAGCTAATGTGCTGATAGGGCAAGGAGACGTGGTGTTGGTAATGAAACGGAAACGGGACTCTAGCATACTTACTGACAGCCAGACAGCGACCAAAAGAATTCGGATGGCCATCAATTAGTGTCGAATTGTTTAAAAACGACCTTGTTTCTACT
|
15
|
+
> reference_alignment: ________________________________________________
|
16
|
+
>
|
17
|
+
> query: AGCGAAAGCAGGTCAAATATATTCAATATGGAGAGAATAAAAGAATTAAG
|
18
|
+
>
|
19
|
+
> query_alignment: GCGAAAGCAGGTCAAATATATTCAATATGGAGAGAATAAAAGAATTAAG
|
20
|
+
>
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: email_reply_parser
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.5.
|
4
|
+
version: 0.5.5
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Rick Olson
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2013-12-
|
11
|
+
date: 2013-12-11 00:00:00.000000000 Z
|
12
12
|
dependencies: []
|
13
13
|
description: Long description. Maybe copied from the README.
|
14
14
|
email: technoweenie@gmail.com
|
@@ -39,6 +39,7 @@ files:
|
|
39
39
|
- test/emails/email_multi_word_sent_from_my_mobile_device.txt
|
40
40
|
- test/emails/email_one_is_not_on.txt
|
41
41
|
- test/emails/email_sent_from_my_not_signature.txt
|
42
|
+
- test/emails/pathological.txt
|
42
43
|
homepage: http://github.com/github/email_reply_parser
|
43
44
|
licenses: []
|
44
45
|
metadata: {}
|