pipio 0.0.1
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +7 -0
- data/.gitignore +27 -0
- data/.rspec +2 -0
- data/.simplecov +5 -0
- data/.travis.yml +12 -0
- data/Gemfile +3 -0
- data/LICENSE +20 -0
- data/NEWS.md +10 -0
- data/README.md +88 -0
- data/Rakefile +13 -0
- data/lib/pipio.rb +34 -0
- data/lib/pipio/alias_registry.rb +26 -0
- data/lib/pipio/chat.rb +39 -0
- data/lib/pipio/cleaners/html_cleaner.rb +95 -0
- data/lib/pipio/cleaners/text_cleaner.rb +15 -0
- data/lib/pipio/file_reader.rb +29 -0
- data/lib/pipio/message_creators/auto_or_xml_message_creator.rb +25 -0
- data/lib/pipio/message_creators/event_message_creator.rb +47 -0
- data/lib/pipio/message_creators/status_message_creator.rb +19 -0
- data/lib/pipio/messages/auto_reply_message.rb +7 -0
- data/lib/pipio/messages/event.rb +67 -0
- data/lib/pipio/messages/message.rb +23 -0
- data/lib/pipio/messages/status_message.rb +26 -0
- data/lib/pipio/messages/xml_message.rb +43 -0
- data/lib/pipio/metadata.rb +34 -0
- data/lib/pipio/metadata_parser.rb +55 -0
- data/lib/pipio/parser_factory.rb +32 -0
- data/lib/pipio/parsers/basic_parser.rb +83 -0
- data/lib/pipio/parsers/html_log_parser.rb +22 -0
- data/lib/pipio/parsers/null_parser.rb +9 -0
- data/lib/pipio/parsers/text_log_parser.rb +21 -0
- data/lib/pipio/tag_balancer.rb +163 -0
- data/lib/pipio/time_parser.rb +36 -0
- data/lib/pipio/version.rb +3 -0
- data/pipio.gemspec +27 -0
- data/spec/pipio/alias_registry_spec.rb +37 -0
- data/spec/pipio/chat_spec.rb +66 -0
- data/spec/pipio/cleaners/html_cleaner_spec.rb +102 -0
- data/spec/pipio/cleaners/text_cleaner_spec.rb +29 -0
- data/spec/pipio/file_reader_spec.rb +130 -0
- data/spec/pipio/messages/auto_reply_message_spec.rb +40 -0
- data/spec/pipio/messages/event_spec.rb +41 -0
- data/spec/pipio/messages/status_message_spec.rb +43 -0
- data/spec/pipio/messages/xml_message_spec.rb +55 -0
- data/spec/pipio/metadata_parser_spec.rb +81 -0
- data/spec/pipio/metadata_spec.rb +72 -0
- data/spec/pipio/parser_factory_spec.rb +31 -0
- data/spec/pipio/parsers/html_log_parser_spec.rb +160 -0
- data/spec/pipio/parsers/null_parser_spec.rb +13 -0
- data/spec/pipio/parsers/text_log_parser_spec.rb +37 -0
- data/spec/pipio/tag_balancer_spec.rb +16 -0
- data/spec/pipio/time_parser_spec.rb +66 -0
- data/spec/pipio_spec.rb +63 -0
- data/spec/spec_helper.rb +18 -0
- data/spec/support/chat_builder.rb +29 -0
- data/spec/support/chat_builder_helpers.rb +41 -0
- data/spec/support/file_builder.rb +22 -0
- data/spec/support/html_chat_builder.rb +67 -0
- data/spec/support/logfiles/2006-12-21.223606.txt +3 -0
- data/spec/support/logfiles/2008-01-15.071445-0500PST.htm +5 -0
- data/spec/support/logfiles/2008-01-15.071445-0500PST.html +5 -0
- data/spec/support/text_chat_builder.rb +21 -0
- data/spec/test-output/README.md +1 -0
- data/spec/test-output/html_log_output.xml +6 -0
- data/spec/test-output/text_log_output.xml +4 -0
- metadata +193 -0
@@ -0,0 +1,22 @@
|
|
1
|
+
module Pipio
|
2
|
+
class HtmlLogParser
|
3
|
+
TIMESTAMP_REGEX = /\((?<timestamp>(?:\d{4}-\d{2}-\d{2} )?\d{1,2}:\d{1,2}:\d{1,2}(?: [AP]M)?)\)/
|
4
|
+
|
5
|
+
def initialize(source_file_path, user_aliases)
|
6
|
+
# @line_regex matches a line in an HTML log file other than the first.
|
7
|
+
line_regex = /#{TIMESTAMP_REGEX} ?<b>(?<sn_or_alias>.+?) ?(?<auto_reply><AUTO-REPLY>)?:?<\/b> ?(?<body>.+)<br ?\/>/o
|
8
|
+
|
9
|
+
# @line_regex_status matches a status or event line.
|
10
|
+
line_regex_status = /#{TIMESTAMP_REGEX} ?<b> (?<body>.+)<\/b><br ?\/>/o
|
11
|
+
|
12
|
+
cleaner = Cleaners::HtmlCleaner
|
13
|
+
|
14
|
+
@parser = BasicParser.new(source_file_path, user_aliases, line_regex,
|
15
|
+
line_regex_status, cleaner)
|
16
|
+
end
|
17
|
+
|
18
|
+
def parse
|
19
|
+
@parser.parse
|
20
|
+
end
|
21
|
+
end
|
22
|
+
end
|
@@ -0,0 +1,21 @@
|
|
1
|
+
module Pipio
|
2
|
+
class TextLogParser
|
3
|
+
TIMESTAMP_REGEX = '\((?<timestamp>\d{1,2}:\d{1,2}:\d{1,2})\)'
|
4
|
+
|
5
|
+
def initialize(source_file_path, user_aliases)
|
6
|
+
# @line_regex matches a line in a text log file other than the first.
|
7
|
+
line_regex = /#{TIMESTAMP_REGEX} (?<sn_or_alias>.*?) ?(?<auto_reply><AUTO-REPLY>)?: (?<body>.*)/o
|
8
|
+
# @line_regex_status matches a status or event line.
|
9
|
+
line_regex_status = /#{TIMESTAMP_REGEX} (?<body>[^:]+)/o
|
10
|
+
|
11
|
+
cleaner = Cleaners::TextCleaner
|
12
|
+
|
13
|
+
@parser = BasicParser.new(source_file_path, user_aliases, line_regex,
|
14
|
+
line_regex_status, cleaner)
|
15
|
+
end
|
16
|
+
|
17
|
+
def parse
|
18
|
+
@parser.parse
|
19
|
+
end
|
20
|
+
end
|
21
|
+
end
|
@@ -0,0 +1,163 @@
|
|
1
|
+
module Pipio
|
2
|
+
# Balances tags of string using a modified stack. Returns a balanced
|
3
|
+
# string, but also affects the text passed into it!
|
4
|
+
# Use text = balance_tags(text).
|
5
|
+
|
6
|
+
# From Wordpress's formatting.php; rewritten in Ruby by Gabe
|
7
|
+
# Berke-Williams, 2009.
|
8
|
+
# Author:: Leonard Lin <leonard@acm.org>
|
9
|
+
# License:: GPL v2.0
|
10
|
+
# Copyright:: November 4, 2001
|
11
|
+
class TagBalancer
|
12
|
+
def initialize(text)
|
13
|
+
@text = text
|
14
|
+
|
15
|
+
@tagstack = []
|
16
|
+
@stacksize = 0
|
17
|
+
@tagqueue = ''
|
18
|
+
|
19
|
+
# Known single-entity/self-closing tags
|
20
|
+
@self_closing_tags = %w(br hr img input meta)
|
21
|
+
|
22
|
+
# Tags that can be immediately nested within themselves
|
23
|
+
@nestable_tags = %w(blockquote div span font)
|
24
|
+
|
25
|
+
# 1: tagname, with possible leading "/"
|
26
|
+
# 2: attributes
|
27
|
+
@tag_regex = /<(\/?\w*)\s*([^>]*)>/
|
28
|
+
end
|
29
|
+
|
30
|
+
def balance
|
31
|
+
text = @text.dup
|
32
|
+
newtext = ''
|
33
|
+
|
34
|
+
@tagstack = []
|
35
|
+
@stacksize = 0
|
36
|
+
@tagqueue = ''
|
37
|
+
|
38
|
+
# WP bug fix for comments - in case you REALLY meant to type '< !--'
|
39
|
+
text.gsub!('< !--', '< !--')
|
40
|
+
|
41
|
+
# WP bug fix for LOVE <3 (and other situations with '<' before a number)
|
42
|
+
text.gsub!(/<([0-9]{1})/, '<\1')
|
43
|
+
|
44
|
+
while ( pos = (text =~ @tag_regex) )
|
45
|
+
newtext << @tagqueue
|
46
|
+
tag = $1.downcase
|
47
|
+
attributes = $2
|
48
|
+
matchlen = $~[0].size
|
49
|
+
|
50
|
+
# clear the shifter
|
51
|
+
@tagqueue = ''
|
52
|
+
# Pop or Push
|
53
|
+
if end_tag?(tag)
|
54
|
+
tag.slice!(0,1)
|
55
|
+
if too_many_closing_tags?
|
56
|
+
tag = ''
|
57
|
+
#or close to be safe: tag = '/' << tag
|
58
|
+
elsif closing_tag?(tag)
|
59
|
+
# if stacktop value == tag close value then pop
|
60
|
+
tag = "</#{tag}>" # Close Tag
|
61
|
+
@tagstack.pop
|
62
|
+
@stacksize -= 1
|
63
|
+
else
|
64
|
+
# closing tag not at top, search for it
|
65
|
+
(@stacksize-1).downto(0) do |j|
|
66
|
+
if @tagstack[j] == tag
|
67
|
+
# add tag to tagqueue
|
68
|
+
ss = @stacksize - 1
|
69
|
+
ss.downto(j) do |k|
|
70
|
+
@tagqueue << "</#{@tagstack.pop}>"
|
71
|
+
@stacksize -= 1
|
72
|
+
end
|
73
|
+
|
74
|
+
break
|
75
|
+
end
|
76
|
+
end
|
77
|
+
tag = ''
|
78
|
+
end
|
79
|
+
else
|
80
|
+
# Begin Tag
|
81
|
+
|
82
|
+
# Tag Cleaning
|
83
|
+
if self_closing_attributes?(attributes) || empty_tag?(tag)
|
84
|
+
elsif self_closing_tag?(tag)
|
85
|
+
# ElseIf: it's a known single-entity tag but it doesn't close itself, do so
|
86
|
+
attributes << '/'
|
87
|
+
else
|
88
|
+
# Push the tag onto the stack
|
89
|
+
# If the top of the stack is the same as the tag we want to push, close previous tag
|
90
|
+
if (@stacksize > 0 &&
|
91
|
+
! nestable?(tag) &&
|
92
|
+
@tagstack[@stacksize - 1] == tag)
|
93
|
+
@tagqueue = "</#{@tagstack.pop}>"
|
94
|
+
@stacksize -= 1
|
95
|
+
end
|
96
|
+
@tagstack.push(tag)
|
97
|
+
@stacksize += 1
|
98
|
+
end
|
99
|
+
|
100
|
+
# Attributes
|
101
|
+
if attributes != ''
|
102
|
+
attributes = ' ' + attributes
|
103
|
+
end
|
104
|
+
tag = "<#{tag}#{attributes}>"
|
105
|
+
#If already queuing a close tag, then put this tag on, too
|
106
|
+
if @tagqueue
|
107
|
+
@tagqueue << tag
|
108
|
+
tag = ''
|
109
|
+
end
|
110
|
+
end
|
111
|
+
newtext << text[0,pos] << tag
|
112
|
+
text = text[pos+matchlen, text.length - (pos+matchlen)]
|
113
|
+
end
|
114
|
+
|
115
|
+
# Clear Tag Queue
|
116
|
+
newtext << @tagqueue
|
117
|
+
|
118
|
+
# Add Remaining text
|
119
|
+
newtext << text
|
120
|
+
|
121
|
+
# Empty Stack
|
122
|
+
@tagstack.reverse_each do |t|
|
123
|
+
newtext << "</#{t}>" # Add remaining tags to close
|
124
|
+
end
|
125
|
+
|
126
|
+
# WP fix for the bug with HTML comments
|
127
|
+
newtext.gsub!("< !--", "<!--")
|
128
|
+
newtext.gsub!("< !--", "< !--")
|
129
|
+
|
130
|
+
newtext
|
131
|
+
end
|
132
|
+
|
133
|
+
private
|
134
|
+
|
135
|
+
def end_tag?(string)
|
136
|
+
string[0,1] == "/"
|
137
|
+
end
|
138
|
+
|
139
|
+
def closing_tag?(tag)
|
140
|
+
@tagstack[@stacksize - 1] == tag
|
141
|
+
end
|
142
|
+
|
143
|
+
def too_many_closing_tags?
|
144
|
+
@stacksize <= 0
|
145
|
+
end
|
146
|
+
|
147
|
+
def self_closing_attributes?(attributes)
|
148
|
+
attributes[-1,1] == '/'
|
149
|
+
end
|
150
|
+
|
151
|
+
def empty_tag?(tag)
|
152
|
+
tag == ''
|
153
|
+
end
|
154
|
+
|
155
|
+
def self_closing_tag?(tag)
|
156
|
+
@self_closing_tags.include?(tag)
|
157
|
+
end
|
158
|
+
|
159
|
+
def nestable?(tag)
|
160
|
+
@nestable_tags.include?(tag)
|
161
|
+
end
|
162
|
+
end
|
163
|
+
end
|
@@ -0,0 +1,36 @@
|
|
1
|
+
module Pipio
|
2
|
+
class TimeParser
|
3
|
+
NO_DATE = /\A\d{1,2}:\d{1,2}:\d{1,2}(?: [AP]M)?\Z/
|
4
|
+
|
5
|
+
# 01/22/2008 03:01:45 PM
|
6
|
+
UNPARSEABLE_BY_DATETIME_PARSE = '%m/%d/%Y %I:%M:%S %P'
|
7
|
+
|
8
|
+
def initialize(year, month, day)
|
9
|
+
@fallback_date_string = "#{year}-#{month}-#{day}"
|
10
|
+
end
|
11
|
+
|
12
|
+
def parse(timestamp)
|
13
|
+
if timestamp
|
14
|
+
if has_no_date?(timestamp)
|
15
|
+
parse_with_date(@fallback_date_string + " " + timestamp)
|
16
|
+
else
|
17
|
+
parse_with_date(timestamp)
|
18
|
+
end
|
19
|
+
end
|
20
|
+
end
|
21
|
+
|
22
|
+
private
|
23
|
+
|
24
|
+
def parse_with_date(timestamp)
|
25
|
+
begin
|
26
|
+
Time.parse(timestamp)
|
27
|
+
rescue ArgumentError
|
28
|
+
Time.strptime(timestamp, UNPARSEABLE_BY_DATETIME_PARSE)
|
29
|
+
end
|
30
|
+
end
|
31
|
+
|
32
|
+
def has_no_date?(timestamp)
|
33
|
+
timestamp.strip =~ NO_DATE
|
34
|
+
end
|
35
|
+
end
|
36
|
+
end
|
data/pipio.gemspec
ADDED
@@ -0,0 +1,27 @@
|
|
1
|
+
# coding: utf-8
|
2
|
+
lib = File.expand_path('../lib', __FILE__)
|
3
|
+
$LOAD_PATH.unshift(lib) unless $LOAD_PATH.include?(lib)
|
4
|
+
require "pipio/version"
|
5
|
+
|
6
|
+
Gem::Specification.new do |spec|
|
7
|
+
spec.name = "pipio"
|
8
|
+
spec.version = Pipio::VERSION
|
9
|
+
spec.authors = ["Gabe Berke-Williams"]
|
10
|
+
spec.email = "gabe@thoughtbot.com"
|
11
|
+
spec.description = "A fast, easy way to parse Pidgin (gaim) logs"
|
12
|
+
spec.summary = spec.description
|
13
|
+
spec.homepage = "https://github.com/gabebw/pipio"
|
14
|
+
spec.license = "MIT"
|
15
|
+
|
16
|
+
spec.files = `git ls-files`.split($/)
|
17
|
+
spec.executables = `git ls-files -- bin/*`.split("\n").map{ |f| File.basename(f) }
|
18
|
+
spec.test_files = `git ls-files -- {test,spec,features}/*`.split("\n")
|
19
|
+
spec.require_paths = ["lib"]
|
20
|
+
|
21
|
+
spec.required_ruby_version = Gem::Requirement.new(">= 1.9.2")
|
22
|
+
|
23
|
+
spec.add_development_dependency("mocha")
|
24
|
+
spec.add_development_dependency("rspec", "~> 3.0")
|
25
|
+
spec.add_development_dependency("rake")
|
26
|
+
spec.add_development_dependency("simplecov")
|
27
|
+
end
|
@@ -0,0 +1,37 @@
|
|
1
|
+
describe Pipio::AliasRegistry do
|
2
|
+
it 'keeps track of aliases' do
|
3
|
+
alias_registry['My Cool Alias'] = 'screen_name88'
|
4
|
+
expect(alias_registry['My Cool Alias']).to eq('screen_name88')
|
5
|
+
end
|
6
|
+
|
7
|
+
it 'finds aliases even when they are queried with an action' do
|
8
|
+
alias_registry['My Cool Alias'] = 'screen_name88'
|
9
|
+
expect(alias_registry['***My Cool Alias']).to eq('screen_name88')
|
10
|
+
end
|
11
|
+
|
12
|
+
it 'downcases screen names' do
|
13
|
+
alias_registry['alias'] = 'UPCASE'
|
14
|
+
expect(alias_registry['alias']).to eq('upcase')
|
15
|
+
end
|
16
|
+
|
17
|
+
it 'removes space from screen names' do
|
18
|
+
alias_registry['alias'] = 'a space'
|
19
|
+
expect(alias_registry['alias']).to eq('aspace')
|
20
|
+
end
|
21
|
+
|
22
|
+
it 'takes a default' do
|
23
|
+
alias_registry = Pipio::AliasRegistry.new('default_name')
|
24
|
+
|
25
|
+
expect(alias_registry['alias']).to eq('default_name')
|
26
|
+
end
|
27
|
+
|
28
|
+
it 'normalizes the default' do
|
29
|
+
alias_registry = Pipio::AliasRegistry.new('DEFAULT NAME')
|
30
|
+
|
31
|
+
expect(alias_registry['alias']).to eq('defaultname')
|
32
|
+
end
|
33
|
+
|
34
|
+
def alias_registry
|
35
|
+
@alias_registry ||= Pipio::AliasRegistry.new('default')
|
36
|
+
end
|
37
|
+
end
|
@@ -0,0 +1,66 @@
|
|
1
|
+
describe Pipio::Chat do
|
2
|
+
describe '#to_s' do
|
3
|
+
it 'converts all Messages to strings and joins them' do
|
4
|
+
chat = Pipio::Chat.new([:a, 1, 3], metadata)
|
5
|
+
|
6
|
+
expect(chat.to_s).to eq("a\n1\n3")
|
7
|
+
end
|
8
|
+
end
|
9
|
+
|
10
|
+
describe '#messages' do
|
11
|
+
it 'returns all messages' do
|
12
|
+
chat = Pipio::Chat.new(%w(a b c), metadata)
|
13
|
+
|
14
|
+
expect(chat.messages).to eq %w(a b c)
|
15
|
+
end
|
16
|
+
end
|
17
|
+
|
18
|
+
it 'is enumerable' do
|
19
|
+
chat = Pipio::Chat.new(%w(a b c), metadata)
|
20
|
+
|
21
|
+
expect(chat.map(&:upcase)).to eq(%w(A B C))
|
22
|
+
end
|
23
|
+
|
24
|
+
describe '#their_screen_name' do
|
25
|
+
it 'is the screen name of the other person in the chat' do
|
26
|
+
chat = Pipio::Chat.new([], metadata(their_screen_name: 'them'))
|
27
|
+
|
28
|
+
expect(chat.their_screen_name).to eq('them')
|
29
|
+
end
|
30
|
+
end
|
31
|
+
|
32
|
+
describe '#my_screen_name' do
|
33
|
+
it 'is my screen name' do
|
34
|
+
chat = Pipio::Chat.new([], metadata(my_screen_name: 'me'))
|
35
|
+
|
36
|
+
expect(chat.my_screen_name).to eq('me')
|
37
|
+
end
|
38
|
+
end
|
39
|
+
|
40
|
+
describe '#start_time_xmlschema' do
|
41
|
+
it 'is the start time of the chat in xmlschema format' do
|
42
|
+
time = Time.now
|
43
|
+
chat = Pipio::Chat.new([], metadata(start_time: time))
|
44
|
+
|
45
|
+
expect(chat.start_time_xmlschema).to eq(time.xmlschema)
|
46
|
+
end
|
47
|
+
end
|
48
|
+
|
49
|
+
describe '#service' do
|
50
|
+
it 'is the chat service' do
|
51
|
+
chat = Pipio::Chat.new([], metadata(service: 'icq'))
|
52
|
+
|
53
|
+
expect(chat.service).to eq('icq')
|
54
|
+
end
|
55
|
+
end
|
56
|
+
|
57
|
+
def metadata(options = {})
|
58
|
+
data = {
|
59
|
+
my_screen_name: 'me',
|
60
|
+
their_screen_name: 'them',
|
61
|
+
start_time: Time.now,
|
62
|
+
service: 'aim'
|
63
|
+
}.merge(options)
|
64
|
+
Pipio::Metadata.new(data)
|
65
|
+
end
|
66
|
+
end
|
@@ -0,0 +1,102 @@
|
|
1
|
+
describe Pipio::Cleaners::HtmlCleaner, ".clean" do
|
2
|
+
it "removes html, body, and font tags" do
|
3
|
+
clean_text = 'clean'
|
4
|
+
dirty_text = %{<html><body><font color="red">#{clean_text}</font></body></html>}
|
5
|
+
expect(clean(dirty_text)).to eq(clean_text)
|
6
|
+
end
|
7
|
+
|
8
|
+
it "removes those weird <FONT HSPACE> tags" do
|
9
|
+
clean_text = 'clean'
|
10
|
+
dirty_text = "</FONT HSPACE='2'>#{clean_text}"
|
11
|
+
expect(clean(dirty_text)).to eq(clean_text)
|
12
|
+
end
|
13
|
+
|
14
|
+
it 'removes \r' do
|
15
|
+
clean_text = 'clean'
|
16
|
+
dirty_text = [clean_text, clean_text, clean_text].join("\r")
|
17
|
+
expect(clean(dirty_text)).to eq(clean_text * 3)
|
18
|
+
end
|
19
|
+
|
20
|
+
it "removes empty lines" do
|
21
|
+
clean_text = 'clean'
|
22
|
+
dirty_text = "#{clean_text}\n\n"
|
23
|
+
expect(clean(dirty_text)).to eq(clean_text)
|
24
|
+
end
|
25
|
+
|
26
|
+
it "replaces newlines with <br/>" do
|
27
|
+
clean_text = "<br/>clean"
|
28
|
+
dirty_text = "\nclean"
|
29
|
+
expect(clean(dirty_text)).to eq(clean_text)
|
30
|
+
end
|
31
|
+
|
32
|
+
it "removes empty links" do
|
33
|
+
clean_text = 'clean' * 2
|
34
|
+
dirty_text = '<a href="awesomelink"> </a>clean' +
|
35
|
+
"<a href='awesomelink'></a>clean"
|
36
|
+
expect(clean(dirty_text)).to eq(clean_text)
|
37
|
+
end
|
38
|
+
|
39
|
+
describe "with <span>s" do
|
40
|
+
it "removes font-family" do
|
41
|
+
clean_text = 'clean'
|
42
|
+
dirty_text = %Q{<span style='font-family: Helvetica;'>#{clean_text}</span>}
|
43
|
+
expect(clean(dirty_text)).to eq(clean_text)
|
44
|
+
end
|
45
|
+
|
46
|
+
it "removes font-size" do
|
47
|
+
clean_text = 'clean'
|
48
|
+
dirty_text = %Q{<span style="font-size: 6;">#{clean_text}</span>}
|
49
|
+
expect(clean(dirty_text)).to eq(clean_text)
|
50
|
+
end
|
51
|
+
|
52
|
+
it "removes background" do
|
53
|
+
clean_text = 'clean'
|
54
|
+
dirty_text = %Q{<span style="background: #00afaf;">#{clean_text}</span>}
|
55
|
+
expect(clean(dirty_text)).to eq(clean_text)
|
56
|
+
end
|
57
|
+
|
58
|
+
it "removes color=#00000" do
|
59
|
+
clean_text = 'clean'
|
60
|
+
dirty_text = %Q{<span style="color: #000000;">#{clean_text}</span>}
|
61
|
+
expect(clean(dirty_text)).to eq(clean_text)
|
62
|
+
end
|
63
|
+
|
64
|
+
it "does not remove color that is not #00000" do
|
65
|
+
dirty_text = %Q{<span style="color: #01ABcdef;">whatever</span>}
|
66
|
+
expect(clean(dirty_text)).to eq(dirty_text)
|
67
|
+
end
|
68
|
+
|
69
|
+
it "removes improperly-formatted colors" do
|
70
|
+
clean_text = 'clean'
|
71
|
+
dirty_text = %Q{<span style="color: #0;">#{clean_text}</span>}
|
72
|
+
expect(clean(dirty_text)).to eq(clean_text)
|
73
|
+
end
|
74
|
+
|
75
|
+
it "replaces <em> with italic font-style" do
|
76
|
+
text = 'whatever'
|
77
|
+
dirty_text = "<em>#{text}</em>"
|
78
|
+
clean_text = %Q{<span style="font-style: italic;">#{text}</span>}
|
79
|
+
expect(clean(dirty_text)).to eq(clean_text)
|
80
|
+
end
|
81
|
+
|
82
|
+
it "does not modify clean text" do
|
83
|
+
expect(clean('clean')).to eq('clean')
|
84
|
+
end
|
85
|
+
|
86
|
+
# This implicitly tests a lot of other things, but they've been tested
|
87
|
+
# before this too.
|
88
|
+
it "removes a trailing space after style declaration and replaces double quotes" do
|
89
|
+
dirty_span_open = "<span style='color: #afaf00; font-size: 14pt; font-weight: bold; '>"
|
90
|
+
# Replaced double quotes, removed space before ">"
|
91
|
+
clean_span_open = '<span style="color: #afaf00;">'
|
92
|
+
text = 'whatever'
|
93
|
+
dirty_text = "#{dirty_span_open}#{text}</span>"
|
94
|
+
clean_text = "#{clean_span_open}#{text}</span>"
|
95
|
+
expect(clean(dirty_text)).to eq(clean_text)
|
96
|
+
end
|
97
|
+
end
|
98
|
+
|
99
|
+
def clean(line)
|
100
|
+
Pipio::Cleaners::HtmlCleaner.clean(line)
|
101
|
+
end
|
102
|
+
end
|