pipio 0.0.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/.gitignore +27 -0
- data/.rspec +2 -0
- data/.simplecov +5 -0
- data/.travis.yml +12 -0
- data/Gemfile +3 -0
- data/LICENSE +20 -0
- data/NEWS.md +10 -0
- data/README.md +88 -0
- data/Rakefile +13 -0
- data/lib/pipio.rb +34 -0
- data/lib/pipio/alias_registry.rb +26 -0
- data/lib/pipio/chat.rb +39 -0
- data/lib/pipio/cleaners/html_cleaner.rb +95 -0
- data/lib/pipio/cleaners/text_cleaner.rb +15 -0
- data/lib/pipio/file_reader.rb +29 -0
- data/lib/pipio/message_creators/auto_or_xml_message_creator.rb +25 -0
- data/lib/pipio/message_creators/event_message_creator.rb +47 -0
- data/lib/pipio/message_creators/status_message_creator.rb +19 -0
- data/lib/pipio/messages/auto_reply_message.rb +7 -0
- data/lib/pipio/messages/event.rb +67 -0
- data/lib/pipio/messages/message.rb +23 -0
- data/lib/pipio/messages/status_message.rb +26 -0
- data/lib/pipio/messages/xml_message.rb +43 -0
- data/lib/pipio/metadata.rb +34 -0
- data/lib/pipio/metadata_parser.rb +55 -0
- data/lib/pipio/parser_factory.rb +32 -0
- data/lib/pipio/parsers/basic_parser.rb +83 -0
- data/lib/pipio/parsers/html_log_parser.rb +22 -0
- data/lib/pipio/parsers/null_parser.rb +9 -0
- data/lib/pipio/parsers/text_log_parser.rb +21 -0
- data/lib/pipio/tag_balancer.rb +163 -0
- data/lib/pipio/time_parser.rb +36 -0
- data/lib/pipio/version.rb +3 -0
- data/pipio.gemspec +27 -0
- data/spec/pipio/alias_registry_spec.rb +37 -0
- data/spec/pipio/chat_spec.rb +66 -0
- data/spec/pipio/cleaners/html_cleaner_spec.rb +102 -0
- data/spec/pipio/cleaners/text_cleaner_spec.rb +29 -0
- data/spec/pipio/file_reader_spec.rb +130 -0
- data/spec/pipio/messages/auto_reply_message_spec.rb +40 -0
- data/spec/pipio/messages/event_spec.rb +41 -0
- data/spec/pipio/messages/status_message_spec.rb +43 -0
- data/spec/pipio/messages/xml_message_spec.rb +55 -0
- data/spec/pipio/metadata_parser_spec.rb +81 -0
- data/spec/pipio/metadata_spec.rb +72 -0
- data/spec/pipio/parser_factory_spec.rb +31 -0
- data/spec/pipio/parsers/html_log_parser_spec.rb +160 -0
- data/spec/pipio/parsers/null_parser_spec.rb +13 -0
- data/spec/pipio/parsers/text_log_parser_spec.rb +37 -0
- data/spec/pipio/tag_balancer_spec.rb +16 -0
- data/spec/pipio/time_parser_spec.rb +66 -0
- data/spec/pipio_spec.rb +63 -0
- data/spec/spec_helper.rb +18 -0
- data/spec/support/chat_builder.rb +29 -0
- data/spec/support/chat_builder_helpers.rb +41 -0
- data/spec/support/file_builder.rb +22 -0
- data/spec/support/html_chat_builder.rb +67 -0
- data/spec/support/logfiles/2006-12-21.223606.txt +3 -0
- data/spec/support/logfiles/2008-01-15.071445-0500PST.htm +5 -0
- data/spec/support/logfiles/2008-01-15.071445-0500PST.html +5 -0
- data/spec/support/text_chat_builder.rb +21 -0
- data/spec/test-output/README.md +1 -0
- data/spec/test-output/html_log_output.xml +6 -0
- data/spec/test-output/text_log_output.xml +4 -0
- metadata +193 -0
@@ -0,0 +1,22 @@
|
|
1
|
+
module Pipio
|
2
|
+
class HtmlLogParser
|
3
|
+
TIMESTAMP_REGEX = /\((?<timestamp>(?:\d{4}-\d{2}-\d{2} )?\d{1,2}:\d{1,2}:\d{1,2}(?: [AP]M)?)\)/
|
4
|
+
|
5
|
+
def initialize(source_file_path, user_aliases)
|
6
|
+
# @line_regex matches a line in an HTML log file other than the first.
|
7
|
+
line_regex = /#{TIMESTAMP_REGEX} ?<b>(?<sn_or_alias>.+?) ?(?<auto_reply><AUTO-REPLY>)?:?<\/b> ?(?<body>.+)<br ?\/>/o
|
8
|
+
|
9
|
+
# @line_regex_status matches a status or event line.
|
10
|
+
line_regex_status = /#{TIMESTAMP_REGEX} ?<b> (?<body>.+)<\/b><br ?\/>/o
|
11
|
+
|
12
|
+
cleaner = Cleaners::HtmlCleaner
|
13
|
+
|
14
|
+
@parser = BasicParser.new(source_file_path, user_aliases, line_regex,
|
15
|
+
line_regex_status, cleaner)
|
16
|
+
end
|
17
|
+
|
18
|
+
def parse
|
19
|
+
@parser.parse
|
20
|
+
end
|
21
|
+
end
|
22
|
+
end
|
@@ -0,0 +1,21 @@
|
|
1
|
+
module Pipio
|
2
|
+
class TextLogParser
|
3
|
+
TIMESTAMP_REGEX = '\((?<timestamp>\d{1,2}:\d{1,2}:\d{1,2})\)'
|
4
|
+
|
5
|
+
def initialize(source_file_path, user_aliases)
|
6
|
+
# @line_regex matches a line in a text log file other than the first.
|
7
|
+
line_regex = /#{TIMESTAMP_REGEX} (?<sn_or_alias>.*?) ?(?<auto_reply><AUTO-REPLY>)?: (?<body>.*)/o
|
8
|
+
# @line_regex_status matches a status or event line.
|
9
|
+
line_regex_status = /#{TIMESTAMP_REGEX} (?<body>[^:]+)/o
|
10
|
+
|
11
|
+
cleaner = Cleaners::TextCleaner
|
12
|
+
|
13
|
+
@parser = BasicParser.new(source_file_path, user_aliases, line_regex,
|
14
|
+
line_regex_status, cleaner)
|
15
|
+
end
|
16
|
+
|
17
|
+
def parse
|
18
|
+
@parser.parse
|
19
|
+
end
|
20
|
+
end
|
21
|
+
end
|
@@ -0,0 +1,163 @@
|
|
1
|
+
module Pipio
|
2
|
+
# Balances tags of string using a modified stack. Returns a balanced
|
3
|
+
# string, but also affects the text passed into it!
|
4
|
+
# Use text = balance_tags(text).
|
5
|
+
|
6
|
+
# From Wordpress's formatting.php; rewritten in Ruby by Gabe
|
7
|
+
# Berke-Williams, 2009.
|
8
|
+
# Author:: Leonard Lin <leonard@acm.org>
|
9
|
+
# License:: GPL v2.0
|
10
|
+
# Copyright:: November 4, 2001
|
11
|
+
class TagBalancer
|
12
|
+
def initialize(text)
|
13
|
+
@text = text
|
14
|
+
|
15
|
+
@tagstack = []
|
16
|
+
@stacksize = 0
|
17
|
+
@tagqueue = ''
|
18
|
+
|
19
|
+
# Known single-entity/self-closing tags
|
20
|
+
@self_closing_tags = %w(br hr img input meta)
|
21
|
+
|
22
|
+
# Tags that can be immediately nested within themselves
|
23
|
+
@nestable_tags = %w(blockquote div span font)
|
24
|
+
|
25
|
+
# 1: tagname, with possible leading "/"
|
26
|
+
# 2: attributes
|
27
|
+
@tag_regex = /<(\/?\w*)\s*([^>]*)>/
|
28
|
+
end
|
29
|
+
|
30
|
+
def balance
|
31
|
+
text = @text.dup
|
32
|
+
newtext = ''
|
33
|
+
|
34
|
+
@tagstack = []
|
35
|
+
@stacksize = 0
|
36
|
+
@tagqueue = ''
|
37
|
+
|
38
|
+
# WP bug fix for comments - in case you REALLY meant to type '< !--'
|
39
|
+
text.gsub!('< !--', '< !--')
|
40
|
+
|
41
|
+
# WP bug fix for LOVE <3 (and other situations with '<' before a number)
|
42
|
+
text.gsub!(/<([0-9]{1})/, '<\1')
|
43
|
+
|
44
|
+
while ( pos = (text =~ @tag_regex) )
|
45
|
+
newtext << @tagqueue
|
46
|
+
tag = $1.downcase
|
47
|
+
attributes = $2
|
48
|
+
matchlen = $~[0].size
|
49
|
+
|
50
|
+
# clear the shifter
|
51
|
+
@tagqueue = ''
|
52
|
+
# Pop or Push
|
53
|
+
if end_tag?(tag)
|
54
|
+
tag.slice!(0,1)
|
55
|
+
if too_many_closing_tags?
|
56
|
+
tag = ''
|
57
|
+
#or close to be safe: tag = '/' << tag
|
58
|
+
elsif closing_tag?(tag)
|
59
|
+
# if stacktop value == tag close value then pop
|
60
|
+
tag = "</#{tag}>" # Close Tag
|
61
|
+
@tagstack.pop
|
62
|
+
@stacksize -= 1
|
63
|
+
else
|
64
|
+
# closing tag not at top, search for it
|
65
|
+
(@stacksize-1).downto(0) do |j|
|
66
|
+
if @tagstack[j] == tag
|
67
|
+
# add tag to tagqueue
|
68
|
+
ss = @stacksize - 1
|
69
|
+
ss.downto(j) do |k|
|
70
|
+
@tagqueue << "</#{@tagstack.pop}>"
|
71
|
+
@stacksize -= 1
|
72
|
+
end
|
73
|
+
|
74
|
+
break
|
75
|
+
end
|
76
|
+
end
|
77
|
+
tag = ''
|
78
|
+
end
|
79
|
+
else
|
80
|
+
# Begin Tag
|
81
|
+
|
82
|
+
# Tag Cleaning
|
83
|
+
if self_closing_attributes?(attributes) || empty_tag?(tag)
|
84
|
+
elsif self_closing_tag?(tag)
|
85
|
+
# ElseIf: it's a known single-entity tag but it doesn't close itself, do so
|
86
|
+
attributes << '/'
|
87
|
+
else
|
88
|
+
# Push the tag onto the stack
|
89
|
+
# If the top of the stack is the same as the tag we want to push, close previous tag
|
90
|
+
if (@stacksize > 0 &&
|
91
|
+
! nestable?(tag) &&
|
92
|
+
@tagstack[@stacksize - 1] == tag)
|
93
|
+
@tagqueue = "</#{@tagstack.pop}>"
|
94
|
+
@stacksize -= 1
|
95
|
+
end
|
96
|
+
@tagstack.push(tag)
|
97
|
+
@stacksize += 1
|
98
|
+
end
|
99
|
+
|
100
|
+
# Attributes
|
101
|
+
if attributes != ''
|
102
|
+
attributes = ' ' + attributes
|
103
|
+
end
|
104
|
+
tag = "<#{tag}#{attributes}>"
|
105
|
+
#If already queuing a close tag, then put this tag on, too
|
106
|
+
if @tagqueue
|
107
|
+
@tagqueue << tag
|
108
|
+
tag = ''
|
109
|
+
end
|
110
|
+
end
|
111
|
+
newtext << text[0,pos] << tag
|
112
|
+
text = text[pos+matchlen, text.length - (pos+matchlen)]
|
113
|
+
end
|
114
|
+
|
115
|
+
# Clear Tag Queue
|
116
|
+
newtext << @tagqueue
|
117
|
+
|
118
|
+
# Add Remaining text
|
119
|
+
newtext << text
|
120
|
+
|
121
|
+
# Empty Stack
|
122
|
+
@tagstack.reverse_each do |t|
|
123
|
+
newtext << "</#{t}>" # Add remaining tags to close
|
124
|
+
end
|
125
|
+
|
126
|
+
# WP fix for the bug with HTML comments
|
127
|
+
newtext.gsub!("< !--", "<!--")
|
128
|
+
newtext.gsub!("< !--", "< !--")
|
129
|
+
|
130
|
+
newtext
|
131
|
+
end
|
132
|
+
|
133
|
+
private
|
134
|
+
|
135
|
+
def end_tag?(string)
|
136
|
+
string[0,1] == "/"
|
137
|
+
end
|
138
|
+
|
139
|
+
def closing_tag?(tag)
|
140
|
+
@tagstack[@stacksize - 1] == tag
|
141
|
+
end
|
142
|
+
|
143
|
+
def too_many_closing_tags?
|
144
|
+
@stacksize <= 0
|
145
|
+
end
|
146
|
+
|
147
|
+
def self_closing_attributes?(attributes)
|
148
|
+
attributes[-1,1] == '/'
|
149
|
+
end
|
150
|
+
|
151
|
+
def empty_tag?(tag)
|
152
|
+
tag == ''
|
153
|
+
end
|
154
|
+
|
155
|
+
def self_closing_tag?(tag)
|
156
|
+
@self_closing_tags.include?(tag)
|
157
|
+
end
|
158
|
+
|
159
|
+
def nestable?(tag)
|
160
|
+
@nestable_tags.include?(tag)
|
161
|
+
end
|
162
|
+
end
|
163
|
+
end
|
@@ -0,0 +1,36 @@
|
|
1
|
+
module Pipio
|
2
|
+
class TimeParser
|
3
|
+
NO_DATE = /\A\d{1,2}:\d{1,2}:\d{1,2}(?: [AP]M)?\Z/
|
4
|
+
|
5
|
+
# 01/22/2008 03:01:45 PM
|
6
|
+
UNPARSEABLE_BY_DATETIME_PARSE = '%m/%d/%Y %I:%M:%S %P'
|
7
|
+
|
8
|
+
def initialize(year, month, day)
|
9
|
+
@fallback_date_string = "#{year}-#{month}-#{day}"
|
10
|
+
end
|
11
|
+
|
12
|
+
def parse(timestamp)
|
13
|
+
if timestamp
|
14
|
+
if has_no_date?(timestamp)
|
15
|
+
parse_with_date(@fallback_date_string + " " + timestamp)
|
16
|
+
else
|
17
|
+
parse_with_date(timestamp)
|
18
|
+
end
|
19
|
+
end
|
20
|
+
end
|
21
|
+
|
22
|
+
private
|
23
|
+
|
24
|
+
def parse_with_date(timestamp)
|
25
|
+
begin
|
26
|
+
Time.parse(timestamp)
|
27
|
+
rescue ArgumentError
|
28
|
+
Time.strptime(timestamp, UNPARSEABLE_BY_DATETIME_PARSE)
|
29
|
+
end
|
30
|
+
end
|
31
|
+
|
32
|
+
def has_no_date?(timestamp)
|
33
|
+
timestamp.strip =~ NO_DATE
|
34
|
+
end
|
35
|
+
end
|
36
|
+
end
|
data/pipio.gemspec
ADDED
@@ -0,0 +1,27 @@
|
|
1
|
+
# coding: utf-8
|
2
|
+
lib = File.expand_path('../lib', __FILE__)
|
3
|
+
$LOAD_PATH.unshift(lib) unless $LOAD_PATH.include?(lib)
|
4
|
+
require "pipio/version"
|
5
|
+
|
6
|
+
Gem::Specification.new do |spec|
|
7
|
+
spec.name = "pipio"
|
8
|
+
spec.version = Pipio::VERSION
|
9
|
+
spec.authors = ["Gabe Berke-Williams"]
|
10
|
+
spec.email = "gabe@thoughtbot.com"
|
11
|
+
spec.description = "A fast, easy way to parse Pidgin (gaim) logs"
|
12
|
+
spec.summary = spec.description
|
13
|
+
spec.homepage = "https://github.com/gabebw/pipio"
|
14
|
+
spec.license = "MIT"
|
15
|
+
|
16
|
+
spec.files = `git ls-files`.split($/)
|
17
|
+
spec.executables = `git ls-files -- bin/*`.split("\n").map{ |f| File.basename(f) }
|
18
|
+
spec.test_files = `git ls-files -- {test,spec,features}/*`.split("\n")
|
19
|
+
spec.require_paths = ["lib"]
|
20
|
+
|
21
|
+
spec.required_ruby_version = Gem::Requirement.new(">= 1.9.2")
|
22
|
+
|
23
|
+
spec.add_development_dependency("mocha")
|
24
|
+
spec.add_development_dependency("rspec", "~> 3.0")
|
25
|
+
spec.add_development_dependency("rake")
|
26
|
+
spec.add_development_dependency("simplecov")
|
27
|
+
end
|
@@ -0,0 +1,37 @@
|
|
1
|
+
describe Pipio::AliasRegistry do
|
2
|
+
it 'keeps track of aliases' do
|
3
|
+
alias_registry['My Cool Alias'] = 'screen_name88'
|
4
|
+
expect(alias_registry['My Cool Alias']).to eq('screen_name88')
|
5
|
+
end
|
6
|
+
|
7
|
+
it 'finds aliases even when they are queried with an action' do
|
8
|
+
alias_registry['My Cool Alias'] = 'screen_name88'
|
9
|
+
expect(alias_registry['***My Cool Alias']).to eq('screen_name88')
|
10
|
+
end
|
11
|
+
|
12
|
+
it 'downcases screen names' do
|
13
|
+
alias_registry['alias'] = 'UPCASE'
|
14
|
+
expect(alias_registry['alias']).to eq('upcase')
|
15
|
+
end
|
16
|
+
|
17
|
+
it 'removes space from screen names' do
|
18
|
+
alias_registry['alias'] = 'a space'
|
19
|
+
expect(alias_registry['alias']).to eq('aspace')
|
20
|
+
end
|
21
|
+
|
22
|
+
it 'takes a default' do
|
23
|
+
alias_registry = Pipio::AliasRegistry.new('default_name')
|
24
|
+
|
25
|
+
expect(alias_registry['alias']).to eq('default_name')
|
26
|
+
end
|
27
|
+
|
28
|
+
it 'normalizes the default' do
|
29
|
+
alias_registry = Pipio::AliasRegistry.new('DEFAULT NAME')
|
30
|
+
|
31
|
+
expect(alias_registry['alias']).to eq('defaultname')
|
32
|
+
end
|
33
|
+
|
34
|
+
def alias_registry
|
35
|
+
@alias_registry ||= Pipio::AliasRegistry.new('default')
|
36
|
+
end
|
37
|
+
end
|
@@ -0,0 +1,66 @@
|
|
1
|
+
describe Pipio::Chat do
|
2
|
+
describe '#to_s' do
|
3
|
+
it 'converts all Messages to strings and joins them' do
|
4
|
+
chat = Pipio::Chat.new([:a, 1, 3], metadata)
|
5
|
+
|
6
|
+
expect(chat.to_s).to eq("a\n1\n3")
|
7
|
+
end
|
8
|
+
end
|
9
|
+
|
10
|
+
describe '#messages' do
|
11
|
+
it 'returns all messages' do
|
12
|
+
chat = Pipio::Chat.new(%w(a b c), metadata)
|
13
|
+
|
14
|
+
expect(chat.messages).to eq %w(a b c)
|
15
|
+
end
|
16
|
+
end
|
17
|
+
|
18
|
+
it 'is enumerable' do
|
19
|
+
chat = Pipio::Chat.new(%w(a b c), metadata)
|
20
|
+
|
21
|
+
expect(chat.map(&:upcase)).to eq(%w(A B C))
|
22
|
+
end
|
23
|
+
|
24
|
+
describe '#their_screen_name' do
|
25
|
+
it 'is the screen name of the other person in the chat' do
|
26
|
+
chat = Pipio::Chat.new([], metadata(their_screen_name: 'them'))
|
27
|
+
|
28
|
+
expect(chat.their_screen_name).to eq('them')
|
29
|
+
end
|
30
|
+
end
|
31
|
+
|
32
|
+
describe '#my_screen_name' do
|
33
|
+
it 'is my screen name' do
|
34
|
+
chat = Pipio::Chat.new([], metadata(my_screen_name: 'me'))
|
35
|
+
|
36
|
+
expect(chat.my_screen_name).to eq('me')
|
37
|
+
end
|
38
|
+
end
|
39
|
+
|
40
|
+
describe '#start_time_xmlschema' do
|
41
|
+
it 'is the start time of the chat in xmlschema format' do
|
42
|
+
time = Time.now
|
43
|
+
chat = Pipio::Chat.new([], metadata(start_time: time))
|
44
|
+
|
45
|
+
expect(chat.start_time_xmlschema).to eq(time.xmlschema)
|
46
|
+
end
|
47
|
+
end
|
48
|
+
|
49
|
+
describe '#service' do
|
50
|
+
it 'is the chat service' do
|
51
|
+
chat = Pipio::Chat.new([], metadata(service: 'icq'))
|
52
|
+
|
53
|
+
expect(chat.service).to eq('icq')
|
54
|
+
end
|
55
|
+
end
|
56
|
+
|
57
|
+
def metadata(options = {})
|
58
|
+
data = {
|
59
|
+
my_screen_name: 'me',
|
60
|
+
their_screen_name: 'them',
|
61
|
+
start_time: Time.now,
|
62
|
+
service: 'aim'
|
63
|
+
}.merge(options)
|
64
|
+
Pipio::Metadata.new(data)
|
65
|
+
end
|
66
|
+
end
|
@@ -0,0 +1,102 @@
|
|
1
|
+
describe Pipio::Cleaners::HtmlCleaner, ".clean" do
|
2
|
+
it "removes html, body, and font tags" do
|
3
|
+
clean_text = 'clean'
|
4
|
+
dirty_text = %{<html><body><font color="red">#{clean_text}</font></body></html>}
|
5
|
+
expect(clean(dirty_text)).to eq(clean_text)
|
6
|
+
end
|
7
|
+
|
8
|
+
it "removes those weird <FONT HSPACE> tags" do
|
9
|
+
clean_text = 'clean'
|
10
|
+
dirty_text = "</FONT HSPACE='2'>#{clean_text}"
|
11
|
+
expect(clean(dirty_text)).to eq(clean_text)
|
12
|
+
end
|
13
|
+
|
14
|
+
it 'removes \r' do
|
15
|
+
clean_text = 'clean'
|
16
|
+
dirty_text = [clean_text, clean_text, clean_text].join("\r")
|
17
|
+
expect(clean(dirty_text)).to eq(clean_text * 3)
|
18
|
+
end
|
19
|
+
|
20
|
+
it "removes empty lines" do
|
21
|
+
clean_text = 'clean'
|
22
|
+
dirty_text = "#{clean_text}\n\n"
|
23
|
+
expect(clean(dirty_text)).to eq(clean_text)
|
24
|
+
end
|
25
|
+
|
26
|
+
it "replaces newlines with <br/>" do
|
27
|
+
clean_text = "<br/>clean"
|
28
|
+
dirty_text = "\nclean"
|
29
|
+
expect(clean(dirty_text)).to eq(clean_text)
|
30
|
+
end
|
31
|
+
|
32
|
+
it "removes empty links" do
|
33
|
+
clean_text = 'clean' * 2
|
34
|
+
dirty_text = '<a href="awesomelink"> </a>clean' +
|
35
|
+
"<a href='awesomelink'></a>clean"
|
36
|
+
expect(clean(dirty_text)).to eq(clean_text)
|
37
|
+
end
|
38
|
+
|
39
|
+
describe "with <span>s" do
|
40
|
+
it "removes font-family" do
|
41
|
+
clean_text = 'clean'
|
42
|
+
dirty_text = %Q{<span style='font-family: Helvetica;'>#{clean_text}</span>}
|
43
|
+
expect(clean(dirty_text)).to eq(clean_text)
|
44
|
+
end
|
45
|
+
|
46
|
+
it "removes font-size" do
|
47
|
+
clean_text = 'clean'
|
48
|
+
dirty_text = %Q{<span style="font-size: 6;">#{clean_text}</span>}
|
49
|
+
expect(clean(dirty_text)).to eq(clean_text)
|
50
|
+
end
|
51
|
+
|
52
|
+
it "removes background" do
|
53
|
+
clean_text = 'clean'
|
54
|
+
dirty_text = %Q{<span style="background: #00afaf;">#{clean_text}</span>}
|
55
|
+
expect(clean(dirty_text)).to eq(clean_text)
|
56
|
+
end
|
57
|
+
|
58
|
+
it "removes color=#00000" do
|
59
|
+
clean_text = 'clean'
|
60
|
+
dirty_text = %Q{<span style="color: #000000;">#{clean_text}</span>}
|
61
|
+
expect(clean(dirty_text)).to eq(clean_text)
|
62
|
+
end
|
63
|
+
|
64
|
+
it "does not remove color that is not #00000" do
|
65
|
+
dirty_text = %Q{<span style="color: #01ABcdef;">whatever</span>}
|
66
|
+
expect(clean(dirty_text)).to eq(dirty_text)
|
67
|
+
end
|
68
|
+
|
69
|
+
it "removes improperly-formatted colors" do
|
70
|
+
clean_text = 'clean'
|
71
|
+
dirty_text = %Q{<span style="color: #0;">#{clean_text}</span>}
|
72
|
+
expect(clean(dirty_text)).to eq(clean_text)
|
73
|
+
end
|
74
|
+
|
75
|
+
it "replaces <em> with italic font-style" do
|
76
|
+
text = 'whatever'
|
77
|
+
dirty_text = "<em>#{text}</em>"
|
78
|
+
clean_text = %Q{<span style="font-style: italic;">#{text}</span>}
|
79
|
+
expect(clean(dirty_text)).to eq(clean_text)
|
80
|
+
end
|
81
|
+
|
82
|
+
it "does not modify clean text" do
|
83
|
+
expect(clean('clean')).to eq('clean')
|
84
|
+
end
|
85
|
+
|
86
|
+
# This implicitly tests a lot of other things, but they've been tested
|
87
|
+
# before this too.
|
88
|
+
it "removes a trailing space after style declaration and replaces double quotes" do
|
89
|
+
dirty_span_open = "<span style='color: #afaf00; font-size: 14pt; font-weight: bold; '>"
|
90
|
+
# Replaced double quotes, removed space before ">"
|
91
|
+
clean_span_open = '<span style="color: #afaf00;">'
|
92
|
+
text = 'whatever'
|
93
|
+
dirty_text = "#{dirty_span_open}#{text}</span>"
|
94
|
+
clean_text = "#{clean_span_open}#{text}</span>"
|
95
|
+
expect(clean(dirty_text)).to eq(clean_text)
|
96
|
+
end
|
97
|
+
end
|
98
|
+
|
99
|
+
def clean(line)
|
100
|
+
Pipio::Cleaners::HtmlCleaner.clean(line)
|
101
|
+
end
|
102
|
+
end
|