fdlint 0.1.0
Sign up to get free protection for your applications and to get access to all the features.
- data/Gemfile +8 -0
- data/Gemfile.lock +14 -0
- data/README.md +68 -0
- data/Rakefile +92 -0
- data/bin/fdlint +17 -0
- data/lib/base_parser.rb +143 -0
- data/lib/cmd_runner.rb +145 -0
- data/lib/context.rb +31 -0
- data/lib/css/parser.rb +186 -0
- data/lib/css/reader.rb +30 -0
- data/lib/css/rule/check_compression_rule.rb +48 -0
- data/lib/css/rule/checklist.rb +45 -0
- data/lib/css/struct.rb +111 -0
- data/lib/encoding_error.rb +6 -0
- data/lib/file_validator.rb +38 -0
- data/lib/helper/code_type.rb +50 -0
- data/lib/helper/color_string.rb +44 -0
- data/lib/helper/file_reader.rb +22 -0
- data/lib/helper/strenc.rb +65 -0
- data/lib/html/parser.rb +212 -0
- data/lib/html/query.rb +96 -0
- data/lib/html/rule/check_tag_rule.rb +80 -0
- data/lib/html/struct.rb +291 -0
- data/lib/js/expr/expr.rb +66 -0
- data/lib/js/expr/left_hand.rb +63 -0
- data/lib/js/expr/operate.rb +92 -0
- data/lib/js/expr/primary.rb +166 -0
- data/lib/js/parser.rb +116 -0
- data/lib/js/rule/all.rb +35 -0
- data/lib/js/rule/checklist.rb +41 -0
- data/lib/js/rule/file_checker.rb +42 -0
- data/lib/js/rule/helper.rb +96 -0
- data/lib/js/rule/no_global.rb +87 -0
- data/lib/js/stat/if.rb +25 -0
- data/lib/js/stat/iter.rb +85 -0
- data/lib/js/stat/stat.rb +117 -0
- data/lib/js/stat/switch.rb +65 -0
- data/lib/js/stat/try.rb +28 -0
- data/lib/js/stat/var.rb +40 -0
- data/lib/js/struct.rb +248 -0
- data/lib/log_entry.rb +49 -0
- data/lib/node.rb +28 -0
- data/lib/parse_error.rb +13 -0
- data/lib/parser_visitable.rb +138 -0
- data/lib/position_info.rb +46 -0
- data/lib/printer/base_printer.rb +24 -0
- data/lib/printer/console_printer.rb +66 -0
- data/lib/printer/nocolor_printer.rb +27 -0
- data/lib/printer/vim_printer.rb +19 -0
- data/lib/rule.rb +241 -0
- data/lib/rule_helper.rb +14 -0
- data/lib/runner.rb +225 -0
- data/rules.d/css.rule +127 -0
- data/rules.d/html.dtd.rule +22 -0
- data/rules.d/html.prop.rule +51 -0
- data/rules.d/html.tag.rule +136 -0
- data/rules.d/js.file.rule +13 -0
- data/rules.d/js.jquery.rule +56 -0
- data/rules.d/js.mergefile.rule +71 -0
- data/rules.d/js.rule +84 -0
- data/test/all_tests.rb +84 -0
- data/test/cli/cli_test.rb +70 -0
- data/test/cli/log_level_test.rb +51 -0
- data/test/cli/output_format_test.rb +47 -0
- data/test/cli/type_test.rb +77 -0
- data/test/css/mac_line_end_support_test.rb +38 -0
- data/test/css/parser_test.rb +276 -0
- data/test/css/rule/check_encoding_test.rb +66 -0
- data/test/css/rule/check_list_rule_test.rb +167 -0
- data/test/css/rule/compression_test.rb +53 -0
- data/test/css/rule/file_name_test.rb +76 -0
- data/test/fixtures/css/broken.css +4 -0
- data/test/fixtures/css/cbu/36.css +52 -0
- data/test/fixtures/css/cbu/china_top.css +324 -0
- data/test/fixtures/css/cbu/default-merge.css +3 -0
- data/test/fixtures/css/cbu/default.css +13 -0
- data/test/fixtures/css/cbu/diy-merge.css +25 -0
- data/test/fixtures/css/cbu/fns-v1.css +27 -0
- data/test/fixtures/css/cbu/index_v0.1.css +12 -0
- data/test/fixtures/css/cbu/merge.css +11 -0
- data/test/fixtures/css/cbu/min.css +2 -0
- data/test/fixtures/css/cbu/my_home_admin.css +126 -0
- data/test/fixtures/css/cbu/nav.css +95 -0
- data/test/fixtures/css/cbu/pic_list.css +386 -0
- data/test/fixtures/css/cbu/quote-edit.css +18 -0
- data/test/fixtures/css/cbu/selloffer.shopwindow.css +1 -0
- data/test/fixtures/css/cbu/v1.css +9 -0
- data/test/fixtures/css/css3.css +30 -0
- data/test/fixtures/css/empty-min.css +0 -0
- data/test/fixtures/css/empty.css +0 -0
- data/test/fixtures/css/font-family.css +4 -0
- data/test/fixtures/css/gb-good.css +14 -0
- data/test/fixtures/css/gb_using_star.css +4 -0
- data/test/fixtures/css/import.css +18 -0
- data/test/fixtures/css/mac-line-sep-err-min.css +1 -0
- data/test/fixtures/css/mac-line-sep-err.css +1 -0
- data/test/fixtures/css/mac-line-sep-good-min.css +1 -0
- data/test/fixtures/css/mac-line-sep-good.css +1 -0
- data/test/fixtures/css/multi-encoding-in-a-file.css +0 -0
- data/test/fixtures/css/simple.css +1 -0
- data/test/fixtures/css/using_expr.css +8 -0
- data/test/fixtures/css/using_hack.css +21 -0
- data/test/fixtures/css/using_id.css +1 -0
- data/test/fixtures/css/using_star.css +4 -0
- data/test/fixtures/css/utf8_good.css +6 -0
- data/test/fixtures/css/utf8_good_declaring_charset.css +7 -0
- data/test/fixtures/css/utf8_using_star.css +5 -0
- data/test/fixtures/html/1-1.html +120 -0
- data/test/fixtures/html/1-2.html +120 -0
- data/test/fixtures/html/cms.html +373 -0
- data/test/fixtures/html/css_out_of_head.html +9 -0
- data/test/fixtures/html/fdev-template.html +22 -0
- data/test/fixtures/html/google.com.html +33 -0
- data/test/fixtures/html/mixed_log_levels.html +4 -0
- data/test/fixtures/html/mixed_types.html +13 -0
- data/test/fixtures/html/no_dtd.html +6 -0
- data/test/fixtures/html/readme.html +94 -0
- data/test/fixtures/html/review.board.html +163 -0
- data/test/fixtures/html/syntax_err.html +3 -0
- data/test/fixtures/html/train/detail/345/233/276/346/226/207/347/273/223/345/220/210.html +208 -0
- data/test/fixtures/html/train/detail/347/232/204Flash.html +212 -0
- data/test/fixtures/html/train/detail/347/232/204Vedio.html +212 -0
- data/test/fixtures/html/train/index.html +37 -0
- data/test/fixtures/html/train/test.html +1 -0
- data/test/fixtures/html/train//344/277/256/346/224/271/344/270/200/347/272/247/345/210/206/347/261/273.html +112 -0
- data/test/fixtures/html/train//344/277/256/346/224/271/345/255/220/345/210/206/347/261/273.html +108 -0
- data/test/fixtures/html/train//344/277/256/346/224/271/350/257/276/347/250/213.html +195 -0
- data/test/fixtures/html/train//345/215/232/345/256/242/350/256/276/347/275/256.html +142 -0
- data/test/fixtures/html/train//346/265/217/350/247/210/350/256/260/345/275/225.html +191 -0
- data/test/fixtures/html/train//346/267/273/345/212/240/344/270/200/347/272/247/345/210/206/347/261/273.html +113 -0
- data/test/fixtures/html/train//346/267/273/345/212/240/345/255/220/345/210/206/347/261/273.html +112 -0
- data/test/fixtures/html/train//346/267/273/345/212/240/350/257/276/347/250/213.html +195 -0
- data/test/fixtures/html/train//347/231/273/345/275/225.html +20 -0
- data/test/fixtures/html/train//347/256/241/347/220/206/345/210/206/347/261/273.html +210 -0
- data/test/fixtures/html/train//347/256/241/347/220/206/345/217/215/351/246/210.html +222 -0
- data/test/fixtures/html/train//347/256/241/347/220/206/350/257/276/347/250/213.html +284 -0
- data/test/fixtures/html/train//347/256/241/347/220/206/350/264/246/346/210/267.html +107 -0
- data/test/fixtures/html/train//347/275/221/344/270/212/345/237/271/350/256/255home/351/241/265.html +354 -0
- data/test/fixtures/html/train//347/275/221/345/225/206/345/237/271/350/256/255list/351/241/265.html +255 -0
- data/test/fixtures/html/train//350/256/276/347/275/256/351/246/226/351/241/265/346/216/250/350/215/220.html +168 -0
- data/test/fixtures/html/train//350/257/264/346/230/216.txt +3 -0
- data/test/fixtures/html/train//351/246/226/351/241/265/345/271/277/345/221/212/350/256/276/347/275/256.html +297 -0
- data/test/fixtures/html/unescaped.html +2 -0
- data/test/fixtures/html/view.vm +916 -0
- data/test/fixtures/js/jquery-1.7.js +9300 -0
- data/test/fixtures/js/scope-test.js +22 -0
- data/test/helper.rb +41 -0
- data/test/html/mixed_type_test.rb +35 -0
- data/test/html/parser/parse_comment_test.rb +47 -0
- data/test/html/parser/parse_dtd_test.rb +46 -0
- data/test/html/parser/parse_script_tag_test.rb +55 -0
- data/test/html/parser/parse_with_auto_close_tag_test.rb +41 -0
- data/test/html/parser/parse_with_diff_case_test.rb +38 -0
- data/test/html/parser/parse_with_emtpy_test.rb +22 -0
- data/test/html/parser/parse_with_multi_children_test.rb +27 -0
- data/test/html/parser/parse_with_multi_line_test.rb +41 -0
- data/test/html/parser/parse_with_prop_test.rb +88 -0
- data/test/html/parser/parse_with_script_tag_test.rb +26 -0
- data/test/html/parser/parse_with_selfclosing_test.rb +39 -0
- data/test/html/parser/parse_with_simple_tag_test.rb +44 -0
- data/test/html/parser/parse_with_simple_tree_test.rb +40 -0
- data/test/html/parser/parse_with_style_tag_test.rb +22 -0
- data/test/html/parser/parse_with_text_test.rb +45 -0
- data/test/html/parser_test.rb +52 -0
- data/test/html/query_test.rb +52 -0
- data/test/html/rule/check_block_level_element_test.rb +52 -0
- data/test/html/rule/check_button_test.rb +45 -0
- data/test/html/rule/check_class_count_test.rb +36 -0
- data/test/html/rule/check_css_in_head_test.rb +53 -0
- data/test/html/rule/check_dtd_test.rb +46 -0
- data/test/html/rule/check_form_element_name_test.rb +49 -0
- data/test/html/rule/check_head_contain_meta_and_title_test.rb +52 -0
- data/test/html/rule/check_html_template_test.rb +103 -0
- data/test/html/rule/check_hyperlink_with_target_test.rb +40 -0
- data/test/html/rule/check_hyperlink_with_title_test.rb +43 -0
- data/test/html/rule/check_id_n_class_downcase_test.rb +40 -0
- data/test/html/rule/check_img_with_alt_prop_test.rb +33 -0
- data/test/html/rule/check_no_import_css_test.rb +36 -0
- data/test/html/rule/check_prop_have_value_test.rb +32 -0
- data/test/html/rule/check_prop_seperator_test.rb +32 -0
- data/test/html/rule/check_style_prop_test.rb +30 -0
- data/test/html/rule/check_tag_closed_test.rb +59 -0
- data/test/html/rule/check_tag_downcase_test.rb +51 -0
- data/test/html/rule/check_unescape_char_test.rb +35 -0
- data/test/html/rule/check_unique_import_test.rb +56 -0
- data/test/html/rule_test.rb +62 -0
- data/test/js/expr/expr.rb +57 -0
- data/test/js/expr/left_hand.rb +25 -0
- data/test/js/expr/operate.rb +145 -0
- data/test/js/expr/primary.rb +89 -0
- data/test/js/parser_test.rb +98 -0
- data/test/js/rule/alert_check_test.rb +37 -0
- data/test/js/rule/all_test.rb +23 -0
- data/test/js/rule/base_test.rb +34 -0
- data/test/js/rule/file_checker_test.rb +131 -0
- data/test/js/rule/jq_check_test.rb +90 -0
- data/test/js/rule/nest_try_catch_test.rb +71 -0
- data/test/js/rule/new_object_and_new_array_test.rb +38 -0
- data/test/js/rule/no_eval_test.rb +34 -0
- data/test/js/rule/no_global_test.rb +88 -0
- data/test/js/rule/private_method_check_test.rb +58 -0
- data/test/js/rule/semicolon_test.rb +63 -0
- data/test/js/rule/stat_if_with_brace_test.rb +68 -0
- data/test/js/rule/stat_if_with_muti_else_test.rb +68 -0
- data/test/js/rule/use_strict_equal_test.rb +44 -0
- data/test/js/rule_test.rb +47 -0
- data/test/js/stat/if.rb +26 -0
- data/test/js/stat/iter.rb +115 -0
- data/test/js/stat/stat.rb +91 -0
- data/test/js/stat/switch.rb +37 -0
- data/test/js/stat/try.rb +32 -0
- data/test/js/stat/var.rb +38 -0
- data/test/parser_visitable_test.rb +102 -0
- data/test/position_info_test.rb +66 -0
- data/test/rule_dsl/dsl_basic_test.rb +91 -0
- data/test/rule_dsl/importing_test.rb +48 -0
- data/test/runner/log_level_test.rb +58 -0
- metadata +317 -0
data/lib/css/reader.rb
ADDED
@@ -0,0 +1,30 @@
|
|
1
|
+
require_relative '../encoding_error'
|
2
|
+
require_relative '../helper/file_reader'
|
3
|
+
|
4
|
+
module XRay
|
5
|
+
module CSS
|
6
|
+
|
7
|
+
class Reader
|
8
|
+
|
9
|
+
include XRay::Helper
|
10
|
+
|
11
|
+
def self.read( file, opt = {} )
|
12
|
+
source, enc = FileReader::readfile(file)
|
13
|
+
declare = get_encoding_declaration(file)
|
14
|
+
if declare and enc != declare
|
15
|
+
raise EncodingError.new
|
16
|
+
end
|
17
|
+
source
|
18
|
+
end
|
19
|
+
|
20
|
+
def self.get_encoding_declaration( file )
|
21
|
+
begin
|
22
|
+
File.open(file, &:readline)[/@charset\s+(['"])(.*?)\1/, 2]
|
23
|
+
rescue
|
24
|
+
end
|
25
|
+
end
|
26
|
+
end
|
27
|
+
|
28
|
+
end
|
29
|
+
|
30
|
+
end
|
@@ -0,0 +1,48 @@
|
|
1
|
+
# encoding: utf-8
|
2
|
+
require_relative '../../log_entry'
|
3
|
+
|
4
|
+
module XRay
|
5
|
+
module CSS
|
6
|
+
module Rule
|
7
|
+
|
8
|
+
class CompressionChecker
|
9
|
+
|
10
|
+
def initialize( opt={} )
|
11
|
+
@opt = opt.dup
|
12
|
+
end
|
13
|
+
|
14
|
+
def check_file( name )
|
15
|
+
check_items([
|
16
|
+
:has_minified_in_same_folder
|
17
|
+
], name)
|
18
|
+
end
|
19
|
+
|
20
|
+
def check_has_minified_in_same_folder( name )
|
21
|
+
unless is_min_file?(name) or is_merge_file?(name) or File.exist?( name.sub(/\.css$/,'-min.css') )
|
22
|
+
[LogEntry.new('发布上线的文件需要压缩,命名规则如a.js->a-min.js,且两者在同一目录下', :error)]
|
23
|
+
end
|
24
|
+
end
|
25
|
+
|
26
|
+
protected
|
27
|
+
def check_items( items, name )
|
28
|
+
results = []
|
29
|
+
items.each do |i|
|
30
|
+
r = self.send(:"check_#{i}", name)
|
31
|
+
results.concat r if Array === r
|
32
|
+
end
|
33
|
+
results
|
34
|
+
end
|
35
|
+
|
36
|
+
def is_merge_file?( name )
|
37
|
+
name =~ /merge\.css$/
|
38
|
+
end
|
39
|
+
|
40
|
+
def is_min_file?( name )
|
41
|
+
name =~ /min\.css$/
|
42
|
+
end
|
43
|
+
|
44
|
+
end
|
45
|
+
|
46
|
+
end
|
47
|
+
end
|
48
|
+
end
|
@@ -0,0 +1,45 @@
|
|
1
|
+
# encoding: utf-8
|
2
|
+
|
3
|
+
require_relative '../../rule'
|
4
|
+
require_relative '../../context'
|
5
|
+
|
6
|
+
module XRay
|
7
|
+
module CSS
|
8
|
+
module Rule
|
9
|
+
|
10
|
+
class CheckListRule
|
11
|
+
|
12
|
+
attr_reader :options
|
13
|
+
|
14
|
+
include ::XRay::Rule, Context
|
15
|
+
|
16
|
+
def initialize(options = {})
|
17
|
+
@options = options
|
18
|
+
end
|
19
|
+
|
20
|
+
def visit_simple_selector(selector)
|
21
|
+
check_css_selector selector
|
22
|
+
end
|
23
|
+
|
24
|
+
def visit_declaration(dec)
|
25
|
+
check_declaration dec
|
26
|
+
end
|
27
|
+
|
28
|
+
def visit_ruleset(ruleset)
|
29
|
+
check_css_ruleset ruleset
|
30
|
+
end
|
31
|
+
|
32
|
+
def visit_property(property)
|
33
|
+
check_css_property property
|
34
|
+
end
|
35
|
+
|
36
|
+
def visit_value(value)
|
37
|
+
check_css_value value
|
38
|
+
end
|
39
|
+
|
40
|
+
|
41
|
+
end
|
42
|
+
|
43
|
+
end
|
44
|
+
end
|
45
|
+
end
|
data/lib/css/struct.rb
ADDED
@@ -0,0 +1,111 @@
|
|
1
|
+
require_relative '../node'
|
2
|
+
|
3
|
+
module XRay
|
4
|
+
module CSS
|
5
|
+
Node = XRay::Node
|
6
|
+
|
7
|
+
class StyleSheet < Node
|
8
|
+
attr_reader :statements
|
9
|
+
|
10
|
+
def initialize(statements)
|
11
|
+
@statements = statements
|
12
|
+
end
|
13
|
+
|
14
|
+
def text
|
15
|
+
rulesets.collect(&:text).join("\n")
|
16
|
+
end
|
17
|
+
|
18
|
+
def position
|
19
|
+
rulesets.empty? ? nil : rulesets[0].position
|
20
|
+
end
|
21
|
+
|
22
|
+
def directives
|
23
|
+
statements.select { |elm| elm.is_a? Directive }
|
24
|
+
end
|
25
|
+
|
26
|
+
def rulesets
|
27
|
+
statements.select { |elm| elm.is_a? RuleSet }
|
28
|
+
end
|
29
|
+
|
30
|
+
alias :at_rules :directives
|
31
|
+
end
|
32
|
+
|
33
|
+
class Directive < Node
|
34
|
+
attr_reader :keyword, :expression, :block
|
35
|
+
|
36
|
+
def initialize(keyword, expression, block = nil)
|
37
|
+
@keyword, @expression, @block = keyword, expression, block
|
38
|
+
end
|
39
|
+
|
40
|
+
def text
|
41
|
+
t = "@#{keyword}"
|
42
|
+
if expression
|
43
|
+
t += "#{expression}"
|
44
|
+
end
|
45
|
+
if block
|
46
|
+
t += "{\n#{block}\n}\n"
|
47
|
+
else
|
48
|
+
t += ';'
|
49
|
+
end
|
50
|
+
t
|
51
|
+
end
|
52
|
+
|
53
|
+
def position
|
54
|
+
keyword.position
|
55
|
+
end
|
56
|
+
end
|
57
|
+
|
58
|
+
class RuleSet < Node
|
59
|
+
attr_reader :selector, :declarations
|
60
|
+
|
61
|
+
def initialize(selector, declarations)
|
62
|
+
@selector, @declarations = selector, declarations
|
63
|
+
end
|
64
|
+
|
65
|
+
def text
|
66
|
+
decs_text = declarations.collect { |dec|
|
67
|
+
"#{' ' * 4}#{dec};"
|
68
|
+
}.join("\n")
|
69
|
+
|
70
|
+
"#{selector} {\n#{decs_text}\n}"
|
71
|
+
end
|
72
|
+
|
73
|
+
def position
|
74
|
+
selector.position
|
75
|
+
end
|
76
|
+
end
|
77
|
+
|
78
|
+
class Selector < Node
|
79
|
+
attr_reader :simple_selectors
|
80
|
+
|
81
|
+
def initialize(simple_selectors)
|
82
|
+
@simple_selectors = simple_selectors
|
83
|
+
end
|
84
|
+
|
85
|
+
def text
|
86
|
+
@simple_selectors.collect(&:text).join(', ')
|
87
|
+
end
|
88
|
+
|
89
|
+
def position
|
90
|
+
simple_selectors.empty? ? nil : simple_selectors[0].position
|
91
|
+
end
|
92
|
+
end
|
93
|
+
|
94
|
+
class Declaration < Node
|
95
|
+
attr_reader :property, :value
|
96
|
+
|
97
|
+
def initialize(property, value)
|
98
|
+
@property, @value = property, value
|
99
|
+
end
|
100
|
+
|
101
|
+
def text
|
102
|
+
"#{property}: #{value}"
|
103
|
+
end
|
104
|
+
|
105
|
+
def position
|
106
|
+
property.position
|
107
|
+
end
|
108
|
+
end
|
109
|
+
|
110
|
+
end
|
111
|
+
end
|
@@ -0,0 +1,38 @@
|
|
1
|
+
module XRay
|
2
|
+
|
3
|
+
class FileValidator
|
4
|
+
def initialize( options )
|
5
|
+
@options = options
|
6
|
+
@validators = []
|
7
|
+
end
|
8
|
+
|
9
|
+
def add_validator( val )
|
10
|
+
@validators << val
|
11
|
+
end
|
12
|
+
|
13
|
+
def add_validators( vals)
|
14
|
+
vals.each { |val| add_validator val }
|
15
|
+
end
|
16
|
+
|
17
|
+
def check( file )
|
18
|
+
results = []
|
19
|
+
@validators.each do |val|
|
20
|
+
if val.respond_to? :check_file
|
21
|
+
val_results = val.check_file(file)
|
22
|
+
if val_results
|
23
|
+
if val_results.is_a? Array
|
24
|
+
results.concat val_results
|
25
|
+
else
|
26
|
+
results << val_results
|
27
|
+
end
|
28
|
+
end
|
29
|
+
end
|
30
|
+
end
|
31
|
+
results
|
32
|
+
end
|
33
|
+
|
34
|
+
alias_method :validate, :check
|
35
|
+
|
36
|
+
end
|
37
|
+
|
38
|
+
end
|
@@ -0,0 +1,50 @@
|
|
1
|
+
class CodeType
|
2
|
+
|
3
|
+
class << self
|
4
|
+
|
5
|
+
public
|
6
|
+
def guess(text, filename=nil)
|
7
|
+
if filename && !filename.empty?
|
8
|
+
guess_by_name filename
|
9
|
+
else
|
10
|
+
guess_by_content text
|
11
|
+
end
|
12
|
+
end
|
13
|
+
|
14
|
+
def guess_by_name( filename )
|
15
|
+
case File.extname( filename )
|
16
|
+
when /\.css$/i
|
17
|
+
:css
|
18
|
+
when /\.js$/i
|
19
|
+
:js
|
20
|
+
else
|
21
|
+
:html #TODO: support more suffix
|
22
|
+
end
|
23
|
+
end
|
24
|
+
|
25
|
+
def guess_by_content(text)
|
26
|
+
return :html if is_html? text
|
27
|
+
return :css if is_css? text
|
28
|
+
:js #TODO: support more code syntaxes
|
29
|
+
end
|
30
|
+
|
31
|
+
def is_style_file?(filename)
|
32
|
+
File.extname( filename ) =~ /(css|js|html?)$/i
|
33
|
+
end
|
34
|
+
|
35
|
+
def scope(filename)
|
36
|
+
filename =~ /[\\\/]lib[\\\/]/ ? 'lib' : 'page'
|
37
|
+
end
|
38
|
+
|
39
|
+
private
|
40
|
+
def is_html?(text)
|
41
|
+
/^\s*</m =~ text
|
42
|
+
end
|
43
|
+
|
44
|
+
def is_css?(text)
|
45
|
+
/^\s*@/m =~ text or /^\s*([-\*:\.#_\w]+\s*)+\{/ =~ text
|
46
|
+
end
|
47
|
+
|
48
|
+
end
|
49
|
+
|
50
|
+
end
|
@@ -0,0 +1,44 @@
|
|
1
|
+
class ColorString
|
2
|
+
|
3
|
+
@@colors = %w(black red green yellow blue magenta cyan white)
|
4
|
+
|
5
|
+
def self.colors
|
6
|
+
@@colors
|
7
|
+
end
|
8
|
+
|
9
|
+
def initialize(str, color=nil, bg=nil)
|
10
|
+
@str, @color, @bg = str, color, bg
|
11
|
+
end
|
12
|
+
|
13
|
+
def to_s
|
14
|
+
return @str if @color.nil? and @bg.nil?
|
15
|
+
s = []
|
16
|
+
s << "3#{@color}" unless @color.nil?
|
17
|
+
s << "4#{@bg}" unless @bg.nil?
|
18
|
+
"\e[#{s.join(';')}m" << @str << "\e[0m"
|
19
|
+
end
|
20
|
+
|
21
|
+
def inspect
|
22
|
+
"#<ColorString \"#{@str}\", color:#{@color}, bg:#{@bg}>"
|
23
|
+
end
|
24
|
+
|
25
|
+
String.public_instance_methods.each do |m|
|
26
|
+
unless self.respond_to? m
|
27
|
+
define_method(m) { |*arg| to_s.send( m, *arg ) }
|
28
|
+
end
|
29
|
+
end
|
30
|
+
|
31
|
+
@@colors.each_with_index do |color, i|
|
32
|
+
String.class_eval do
|
33
|
+
define_method(color) { ColorString.new(self, i) }
|
34
|
+
define_method(color << '_bg') { ColorString.new(self, nil, i) }
|
35
|
+
end
|
36
|
+
|
37
|
+
ColorString.class_eval do
|
38
|
+
define_method(color) { @color = i; self }
|
39
|
+
define_method(color << '_bg') { @bg = i; self }
|
40
|
+
end
|
41
|
+
|
42
|
+
end
|
43
|
+
end
|
44
|
+
|
@@ -0,0 +1,22 @@
|
|
1
|
+
require_relative 'strenc'
|
2
|
+
|
3
|
+
module XRay
|
4
|
+
module Helper
|
5
|
+
module FileReader
|
6
|
+
extend self
|
7
|
+
|
8
|
+
# auto detect file encoding and read it.
|
9
|
+
# return with an array containing string
|
10
|
+
# and encoding
|
11
|
+
def readfile(path, opt={})
|
12
|
+
if File.readable?(path)
|
13
|
+
bin = File.read(path).utf8!
|
14
|
+
[bin, bin.former_enc ||'ascii-8bit' ]
|
15
|
+
else
|
16
|
+
raise ArgumentError.new("File is not readable!")
|
17
|
+
end
|
18
|
+
end
|
19
|
+
|
20
|
+
end
|
21
|
+
end
|
22
|
+
end
|
@@ -0,0 +1,65 @@
|
|
1
|
+
# encoding: utf-8
|
2
|
+
|
3
|
+
class String
|
4
|
+
|
5
|
+
@@encs = %w(ascii utf-8 gb18030 gbk gb2312 cp936 big5)
|
6
|
+
|
7
|
+
class << self
|
8
|
+
def encs; @@encs; end
|
9
|
+
def encs=(arr); @@encs = arr; end
|
10
|
+
end
|
11
|
+
|
12
|
+
if public_instance_methods.include? :encode!
|
13
|
+
def try_convert to, from
|
14
|
+
encode!( to, from ).force_encoding( to )
|
15
|
+
valid_encoding?
|
16
|
+
end
|
17
|
+
else
|
18
|
+
# for ruby 1.8
|
19
|
+
require 'iconv'
|
20
|
+
def try_convert to, from
|
21
|
+
text = Iconv.new(to, from).iconv(self)
|
22
|
+
replace(text) if self =~ /./
|
23
|
+
end
|
24
|
+
end
|
25
|
+
|
26
|
+
attr_reader :former_enc
|
27
|
+
|
28
|
+
def enc!(encoding)
|
29
|
+
|
30
|
+
if respond_to? :force_encoding
|
31
|
+
force_encoding encoding
|
32
|
+
if valid_encoding?
|
33
|
+
@former_enc = encoding
|
34
|
+
return self
|
35
|
+
end
|
36
|
+
end
|
37
|
+
|
38
|
+
@@encs.each do |c|
|
39
|
+
begin
|
40
|
+
if send :try_convert, encoding, c
|
41
|
+
@former_enc = c
|
42
|
+
break
|
43
|
+
end
|
44
|
+
rescue
|
45
|
+
end
|
46
|
+
end
|
47
|
+
self
|
48
|
+
end
|
49
|
+
|
50
|
+
@@encs.each do |tar|
|
51
|
+
mtd = tar.gsub(/-/, '') << "!"
|
52
|
+
define_method mtd do
|
53
|
+
enc! tar
|
54
|
+
end
|
55
|
+
public mtd
|
56
|
+
end
|
57
|
+
|
58
|
+
private :try_convert
|
59
|
+
|
60
|
+
end
|
61
|
+
|
62
|
+
|
63
|
+
if __FILE__ == $0
|
64
|
+
puts File.read('test/fixtures/html/1-1.html').utf8!
|
65
|
+
end
|
data/lib/html/parser.rb
ADDED
@@ -0,0 +1,212 @@
|
|
1
|
+
require_relative '../base_parser'
|
2
|
+
require_relative '../css/parser'
|
3
|
+
require_relative '../js/parser'
|
4
|
+
require_relative 'struct'
|
5
|
+
|
6
|
+
module XRay; module HTML
|
7
|
+
|
8
|
+
class Parser < BaseParser
|
9
|
+
|
10
|
+
def self.parse(src, &block)
|
11
|
+
parser = self.new(src)
|
12
|
+
doc = parser.parse
|
13
|
+
yield doc if block_given?
|
14
|
+
doc
|
15
|
+
end
|
16
|
+
|
17
|
+
TEXT = /[^<]+/m
|
18
|
+
PROP_NAME = %r/\w[-:\w]*/m
|
19
|
+
PROP_VALUE = %r/'([^']*)'|"([^"]*)"|([^\s>]+)/m
|
20
|
+
PROP = %r/#{PROP_NAME}\s*(?:=\s*#{PROP_VALUE})?/m
|
21
|
+
TAG_NAME = /\w[^>\(\)\/\s]*/
|
22
|
+
TAG_START = %r/<(#{TAG_NAME})/m
|
23
|
+
TAG_END = %r/<\/#{TAG_NAME}\s*>/m
|
24
|
+
TAG = %r/#{TAG_START}(\s+#{PROP})*\s*>/m
|
25
|
+
SELF_CLOSE_TAG = %r/#{TAG_START}(\s+#{PROP})*\s*\/>/m
|
26
|
+
DTD = /\s*<!(doctype)\s+(.*?)>/im
|
27
|
+
COMMENT = /<!--(.*?)-->/m
|
28
|
+
|
29
|
+
def parse
|
30
|
+
parse_doc
|
31
|
+
end
|
32
|
+
|
33
|
+
def parse_doc
|
34
|
+
nodes = batch(:parse_element)
|
35
|
+
case nodes.size
|
36
|
+
when 0 then nil
|
37
|
+
when 1 then nodes[0]
|
38
|
+
else
|
39
|
+
::XRay::HTML::Document.new( nodes )
|
40
|
+
end
|
41
|
+
end
|
42
|
+
|
43
|
+
def parse_element
|
44
|
+
if @scanner.check(DTD) and !@dtd_checked
|
45
|
+
@dtd_checked = true
|
46
|
+
parse_dtd
|
47
|
+
elsif @scanner.check(COMMENT)
|
48
|
+
parse_comment
|
49
|
+
elsif @scanner.check(TAG_START)
|
50
|
+
parse_tag
|
51
|
+
elsif !text_end?
|
52
|
+
parse_text
|
53
|
+
else
|
54
|
+
parse_error('Invalid HTML struct')
|
55
|
+
end
|
56
|
+
end
|
57
|
+
|
58
|
+
def parse_dtd
|
59
|
+
node = scan(DTD)
|
60
|
+
DTDElement.new(@scanner[2], @scanner[1], node.position)
|
61
|
+
end
|
62
|
+
|
63
|
+
def parse_comment
|
64
|
+
scan COMMENT
|
65
|
+
CommentElement.new(@scanner[1])
|
66
|
+
end
|
67
|
+
|
68
|
+
def parse_text
|
69
|
+
text = ''
|
70
|
+
until text_end? do
|
71
|
+
text << '<' if @scanner.skip(/</)
|
72
|
+
text << "#{@scanner.scan(TEXT)}"
|
73
|
+
|
74
|
+
# TODO: make this detection a rule
|
75
|
+
parse_warn "'#{$~}' not escaped" if text =~ /<|>/
|
76
|
+
end
|
77
|
+
TextElement.new text
|
78
|
+
end
|
79
|
+
|
80
|
+
def parse_tag
|
81
|
+
if @scanner.check DTD
|
82
|
+
parse_dtd_tag
|
83
|
+
elsif @scanner.check SELF_CLOSE_TAG
|
84
|
+
parse_self_ending_tag
|
85
|
+
elsif @scanner.check TAG
|
86
|
+
parse_normal_tag
|
87
|
+
else
|
88
|
+
parse_error('Invalid HTML struct')
|
89
|
+
end
|
90
|
+
end
|
91
|
+
|
92
|
+
def parse_properties
|
93
|
+
skip_empty
|
94
|
+
props = []
|
95
|
+
until prop_search_done? do
|
96
|
+
prop = parse_property
|
97
|
+
props << prop if prop
|
98
|
+
skip_empty
|
99
|
+
end
|
100
|
+
props
|
101
|
+
end
|
102
|
+
|
103
|
+
def parse_property
|
104
|
+
name = parse_prop_name
|
105
|
+
if @scanner.check( /\s*=/ )
|
106
|
+
skip /[=]/
|
107
|
+
sep = @scanner.check(/['"]/)
|
108
|
+
value = parse_prop_value
|
109
|
+
end
|
110
|
+
Property.new name, value, sep
|
111
|
+
end
|
112
|
+
|
113
|
+
def parse_prop_name
|
114
|
+
scan PROP_NAME
|
115
|
+
end
|
116
|
+
|
117
|
+
def parse_prop_value
|
118
|
+
scan PROP_VALUE
|
119
|
+
"#{@scanner[1]}#{@scanner[2]}#{@scanner[3]}"
|
120
|
+
end
|
121
|
+
|
122
|
+
protected
|
123
|
+
def prop_search_done?
|
124
|
+
@scanner.check(/\/>|>/) or @scanner.eos?
|
125
|
+
end
|
126
|
+
|
127
|
+
def parse_normal_tag
|
128
|
+
skip /</
|
129
|
+
tag, prop = scan(TAG_NAME), parse_properties
|
130
|
+
@parsing_script = tag =~ /^script$/i
|
131
|
+
skip />/
|
132
|
+
|
133
|
+
scopes << tag.text
|
134
|
+
|
135
|
+
children = []
|
136
|
+
ending = nil
|
137
|
+
begin
|
138
|
+
end_tag = %r(<#{tag.text.sub(/^(?!=\/)/, '\/')}>)i
|
139
|
+
rescue
|
140
|
+
raise ::XRay::ParseError.new("invalid tag name: #{tag.text}", scanner_pos)
|
141
|
+
end
|
142
|
+
|
143
|
+
if auto_close?(tag.text) and !@scanner.check(end_tag)
|
144
|
+
close_type = :none
|
145
|
+
else
|
146
|
+
until @scanner.check(TAG_END) or @scanner.eos? do
|
147
|
+
child = parse_element
|
148
|
+
children << child if child
|
149
|
+
end
|
150
|
+
begin
|
151
|
+
ending = scan(end_tag).text
|
152
|
+
close_type = :after
|
153
|
+
rescue => e
|
154
|
+
close_type = :none
|
155
|
+
raise e
|
156
|
+
end
|
157
|
+
end
|
158
|
+
@parsing_script = false
|
159
|
+
|
160
|
+
scopes.pop
|
161
|
+
|
162
|
+
el = Element.new(tag, prop, children, close_type, ending)
|
163
|
+
el.scopes = scopes.dup
|
164
|
+
el
|
165
|
+
end
|
166
|
+
|
167
|
+
def scopes
|
168
|
+
@scopes ||= []
|
169
|
+
end
|
170
|
+
|
171
|
+
def parse_dtd_tag
|
172
|
+
scan DTD
|
173
|
+
end
|
174
|
+
|
175
|
+
def parse_self_ending_tag
|
176
|
+
skip /</
|
177
|
+
tag = scan(TAG_NAME)
|
178
|
+
prop = parse_properties
|
179
|
+
skip /\/>/
|
180
|
+
el = Element.new(tag, prop, [], :self)
|
181
|
+
el.scopes = scopes.dup
|
182
|
+
el
|
183
|
+
end
|
184
|
+
|
185
|
+
def auto_close?(tag)
|
186
|
+
XRay::HTML::AUTO_CLOSE_TAGS.include?(tag.to_s.downcase)
|
187
|
+
end
|
188
|
+
|
189
|
+
def text_end?
|
190
|
+
|
191
|
+
return true if @scanner.eos?
|
192
|
+
|
193
|
+
if @parsing_script
|
194
|
+
@scanner.check(/<\/script\s*>/)
|
195
|
+
else
|
196
|
+
@scanner.check(%r(#{TAG}|#{SELF_CLOSE_TAG}|#{TAG_END}|#{COMMENT}))
|
197
|
+
end
|
198
|
+
end
|
199
|
+
|
200
|
+
end
|
201
|
+
|
202
|
+
|
203
|
+
end; end
|
204
|
+
|
205
|
+
if __FILE__ == $0
|
206
|
+
XRay::HTML::Parser.parse(%q(<div class="info" checked>information</div>)) { |e| puts e.outer_html }
|
207
|
+
XRay::HTML::Parser.parse(%q(<img width="100" height='150' id=img > <center>text</center>)) { |e| puts e.first.outer_html }
|
208
|
+
XRay::HTML::Parser.parse(%q(<center><div><div><center>text</center></div></div></center>)) { |e| puts e.outer_html }
|
209
|
+
XRay::HTML::Parser.parse(%q(<center ns:name="value"><div><div><center>text</center></div></div></center>)) { |e| puts e.outer_html }
|
210
|
+
begin; XRay::HTML::Parser.parse(%q(<center><div></center></div>)) { |e| puts e.outer_html }; rescue; end
|
211
|
+
XRay::HTML::Parser.parse('<br/>') { |e| puts e.outer_html }
|
212
|
+
end
|