email_reply_parser 0.5.4 → 0.5.5
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/email_reply_parser.gemspec +1 -0
- data/lib/email_reply_parser.rb +11 -4
- data/test/email_reply_parser_test.rb +6 -0
- data/test/emails/pathological.txt +20 -0
- metadata +3 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 7ac6a5b1b44a4c6b58ec03ad6bca58d2bbfca5ab
|
4
|
+
data.tar.gz: a4c485aea71c6fdc5b221c80d7b37189ee7185ff
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 8409fbf234b0c3c8f63ba6cae894f9301b588ca065ce69e802278ef6083f31599698d61f83555e2cd4194abfad50c223358fb5994ccc4315f08a6bb5e879b5eb
|
7
|
+
data.tar.gz: 86c7e2e2ea27d775dc530503687e9ef116d97d612736ec13e08c4077eb631e5e76179a3d739ab6a6e07a8c41d1520ea35cb435c750c2fefd8b17392601914e7d
|
data/email_reply_parser.gemspec
CHANGED
data/lib/email_reply_parser.rb
CHANGED
@@ -30,7 +30,7 @@ require 'strscan'
|
|
30
30
|
#
|
31
31
|
# [mail]: https://github.com/mikel/mail
|
32
32
|
class EmailReplyParser
|
33
|
-
VERSION = "0.5.
|
33
|
+
VERSION = "0.5.5"
|
34
34
|
|
35
35
|
# Public: Splits an email body into a list of Fragments.
|
36
36
|
#
|
@@ -127,7 +127,14 @@ class EmailReplyParser
|
|
127
127
|
|
128
128
|
private
|
129
129
|
EMPTY = "".freeze
|
130
|
-
|
130
|
+
SIGNATURE = '(?m)(--|__|\w-$)|(^(\w+\s*){1,3} ym morf tneS$)'
|
131
|
+
|
132
|
+
begin
|
133
|
+
require 're2'
|
134
|
+
SIG_REGEX = RE2::Regexp.new(SIGNATURE)
|
135
|
+
rescue LoadError
|
136
|
+
SIG_REGEX = Regexp.new(SIGNATURE)
|
137
|
+
end
|
131
138
|
|
132
139
|
### Line-by-Line Parsing
|
133
140
|
|
@@ -139,7 +146,7 @@ class EmailReplyParser
|
|
139
146
|
# Returns nothing.
|
140
147
|
def scan_line(line)
|
141
148
|
line.chomp!("\n")
|
142
|
-
line.lstrip! unless line
|
149
|
+
line.lstrip! unless SIG_REGEX.match(line)
|
143
150
|
|
144
151
|
# We're looking for leading `>`'s to see if this line is part of a
|
145
152
|
# quoted Fragment.
|
@@ -148,7 +155,7 @@ class EmailReplyParser
|
|
148
155
|
# Mark the current Fragment as a signature if the current line is empty
|
149
156
|
# and the Fragment starts with a common signature indicator.
|
150
157
|
if @fragment && line == EMPTY
|
151
|
-
if @fragment.lines.last
|
158
|
+
if SIG_REGEX.match @fragment.lines.last
|
152
159
|
@fragment.signature = true
|
153
160
|
finish_fragment
|
154
161
|
end
|
@@ -156,6 +156,12 @@ I am currently using the Java HTTP API.\n", reply.fragments[0].to_s
|
|
156
156
|
assert_match /^On Oct 1, 2012/, reply.fragments[1].to_s
|
157
157
|
end
|
158
158
|
|
159
|
+
def test_pathological_emails
|
160
|
+
t0 = Time.now
|
161
|
+
reply = email("pathological")
|
162
|
+
assert (Time.now - t0) < 1, "Took too long, upgrade to re2 gem."
|
163
|
+
end
|
164
|
+
|
159
165
|
def email(name)
|
160
166
|
body = IO.read EMAIL_FIXTURE_PATH.join("#{name}.txt").to_s
|
161
167
|
EmailReplyParser.read body
|
@@ -0,0 +1,20 @@
|
|
1
|
+
I think you're onto something. I will try to fix the problem as soon as I
|
2
|
+
get back to a computer.
|
3
|
+
On Dec 8, 2013 2:10 PM, "John Sullivan" <notifications@github.com> wrote:
|
4
|
+
|
5
|
+
> I think your code is shortening the reference sequence you return to be
|
6
|
+
> the same size as the query sequence, and we end up losing data. Here's some
|
7
|
+
> debugging output from me putzing around...
|
8
|
+
>
|
9
|
+
> name: gi|253409428|ref|GQ227366.1| Influenza A virus (A/pika/Qinghai/BI/2007(H5N1)) segment 1 polymerase PB2 (PB2) gene, complete cds
|
10
|
+
> score: 39.0
|
11
|
+
>
|
12
|
+
> organism.sequence: ATGGAGAGAATAAAGGAATTAAGAGATCTAATGTCACAGTCCCGCACTCGCGAGATACTAACAAAGACCACTGTGGACCATATGGCCATAATCAAGAAATACACATCAGGAAGACAAGAGAAGAACCCTGCTCTCAGAATGAAATGGATGATGGCAATGAAATATCCAATCACAGCGGACAAGAGAATAATAGAGATGATTCCTGAAAGGAATGAACAAGGACAGACACTCTGGAGCAAGACAAATGATGCTGGATCGGACAGGGTGATGGTGTCTCCCCTAGCTGTAACTTGGTGGAATAGGAATGGGCCGACGACAAGTACAGTTCATTATCCAAAGGTTTACAAAACATACTTTGAGAAGGTTGAAAGGTTAAAACATGGAACCTTCGGTCCCGTTCATTTCCGAAACCAAGTTAAAATACGCCGCCGAGTTGATACAAATCCTGGCCATGCAGATCTCAGTGCTAAAGAAGCACAAGATGTCATCATGGAGGTCGTTTTCCCAAATGAAGTGGGAGCTAGAATATTGACTTCAGAGTCACAGTTGACAATAACGAAAGAGAAAAAAGAAGAGCTCCAAGATTGTAAGATTGCTCCCTTAATGGTTGCATACATGTTGGAAAGGGAACTGGTCCGCAAAACCAGATTCCTACCAGTAGCAGGCGGAACAAGCAGTGTGTACATTGAGGTATTGCATTTGACTCAAGGAACCTGCTGGGCACAGATGTACACTCCAGGCGGAGAAGTAAGAAATGACGATGTTGACCAGAGTTTGATCATTGCTGCCAGAAACATTGTTAGGAGAGCAACGGTATCAGCGGATCCACTGGCATCACTGCTGGAGATGTGTCACAGCACACAAATTGGTGGGATAAGGATGGTGGACATCCTTAGGCAAACTCCAACTGAGGAACAAGCTGTGGATATATGCAAAGCAGCAATGGGTCTGAGGATTAGTTCATCCTTTAGCTTTGGAG
|
13
|
+
> GCTTCACTTTCAAAAGAACAAGTGGATCATCCGCCACGAAGGAAGAGGAAGTGCTTACAGGCAACCTCCAAACATTGAAAATAAGAGTACATGAGGGGTATGAGGAGTTCACAATGGTTGGGCAGAGGGCAACAGCTATCCTGAGGAAAGCAACTAGAAGGCTGATTCAGTTGATAGTAAGTGGAAGAAACGAACAATCAATCGCTGAGGCAATCATTGTAGCAATGGTGTTCTCACAGGAGGATCGCATGATAAAAGCAGTCCGAGGCGATCTGAATTTCGTAAACAGAGCAAACCAAAGATTAAACCCCATGCATCAACTCCTGAGACATTTTCAAAAGGACGCAAAAGTGCTATTTCAGAATTGGGGAACTGAGCCAATTGATAATGTCATGGGGATGATCGGAATATTACCTGACATGACTCCCAGCACAGAAACGTCACTGAGAGGAGTGAGAGTTAGTAAAATGGGAGTAGATGAGTATTCCAGCACTGAGAGAGTAGTTGTAAGCATTGACCGCTTCTTAAGGGTTCGAGACCAGCGGGGGAACGTACTCTTATCTCCCGAAGAGGTCAGCGAAACCCAGGGAACAGAGAAGTTGACAATAACATATTCATCATCAATGATGTGGGAAATCAACGGTCCTGAGTCAGTGCTTGTTAACACTTACCAATGGATCATTAGAAACTGGGAGACCGTGAAAATTCAGTGGTCTCAGGACCCCACGATGTTGTACAATAAGATGGAGTTTGAACCGTTCCAATCCTTGGTACCTAAAGCTGCCAGAGGTCAATACAGTGGATTTGTGAGAACATTATTCCAACAAATGCGTGACGTACTGGGGACATTTGATACTGTCCAGATAATAAAGCTGCTACCATTTGCAGCAGCCCCACCGAAGCAGAGCAGAATGCAGTTTTCTTCTCTAACTGTGAATGTGAGAGGCTCAGGAATGAGAATACTCATAAGGGGCAATTCCCCTGTGTTCAACTACAA
|
14
|
+
> TAAGGCAACCCAAAGACTTACCGTTCTTGGAAAGGACGCAGGTGCATTAACAGAGGATCCAGATGAGGGGACAGCCGGAGTGGAATCTGCAGTACTGAGGGGGTTCCTAATTCTAGGCAAGGAGGACAAAAGATATGGACCAGCATTGAGCATCAATGAACTGAGCAATCTTGCAAAAGGGGAGAAAGCTAATGTGCTGATAGGGCAAGGAGACGTGGTGTTGGTAATGAAACGGAAACGGGACTCTAGCATACTTACTGACAGCCAGACAGCGACCAAAAGAATTCGGATGGCCATCAATTAGTGTCGAATTGTTTAAAAACGACCTTGTTTCTACT
|
15
|
+
> reference_alignment: ________________________________________________
|
16
|
+
>
|
17
|
+
> query: AGCGAAAGCAGGTCAAATATATTCAATATGGAGAGAATAAAAGAATTAAG
|
18
|
+
>
|
19
|
+
> query_alignment: GCGAAAGCAGGTCAAATATATTCAATATGGAGAGAATAAAAGAATTAAG
|
20
|
+
>
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: email_reply_parser
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.5.
|
4
|
+
version: 0.5.5
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Rick Olson
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2013-12-
|
11
|
+
date: 2013-12-11 00:00:00.000000000 Z
|
12
12
|
dependencies: []
|
13
13
|
description: Long description. Maybe copied from the README.
|
14
14
|
email: technoweenie@gmail.com
|
@@ -39,6 +39,7 @@ files:
|
|
39
39
|
- test/emails/email_multi_word_sent_from_my_mobile_device.txt
|
40
40
|
- test/emails/email_one_is_not_on.txt
|
41
41
|
- test/emails/email_sent_from_my_not_signature.txt
|
42
|
+
- test/emails/pathological.txt
|
42
43
|
homepage: http://github.com/github/email_reply_parser
|
43
44
|
licenses: []
|
44
45
|
metadata: {}
|