fdlint 0.1.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (218) hide show
  1. data/Gemfile +8 -0
  2. data/Gemfile.lock +14 -0
  3. data/README.md +68 -0
  4. data/Rakefile +92 -0
  5. data/bin/fdlint +17 -0
  6. data/lib/base_parser.rb +143 -0
  7. data/lib/cmd_runner.rb +145 -0
  8. data/lib/context.rb +31 -0
  9. data/lib/css/parser.rb +186 -0
  10. data/lib/css/reader.rb +30 -0
  11. data/lib/css/rule/check_compression_rule.rb +48 -0
  12. data/lib/css/rule/checklist.rb +45 -0
  13. data/lib/css/struct.rb +111 -0
  14. data/lib/encoding_error.rb +6 -0
  15. data/lib/file_validator.rb +38 -0
  16. data/lib/helper/code_type.rb +50 -0
  17. data/lib/helper/color_string.rb +44 -0
  18. data/lib/helper/file_reader.rb +22 -0
  19. data/lib/helper/strenc.rb +65 -0
  20. data/lib/html/parser.rb +212 -0
  21. data/lib/html/query.rb +96 -0
  22. data/lib/html/rule/check_tag_rule.rb +80 -0
  23. data/lib/html/struct.rb +291 -0
  24. data/lib/js/expr/expr.rb +66 -0
  25. data/lib/js/expr/left_hand.rb +63 -0
  26. data/lib/js/expr/operate.rb +92 -0
  27. data/lib/js/expr/primary.rb +166 -0
  28. data/lib/js/parser.rb +116 -0
  29. data/lib/js/rule/all.rb +35 -0
  30. data/lib/js/rule/checklist.rb +41 -0
  31. data/lib/js/rule/file_checker.rb +42 -0
  32. data/lib/js/rule/helper.rb +96 -0
  33. data/lib/js/rule/no_global.rb +87 -0
  34. data/lib/js/stat/if.rb +25 -0
  35. data/lib/js/stat/iter.rb +85 -0
  36. data/lib/js/stat/stat.rb +117 -0
  37. data/lib/js/stat/switch.rb +65 -0
  38. data/lib/js/stat/try.rb +28 -0
  39. data/lib/js/stat/var.rb +40 -0
  40. data/lib/js/struct.rb +248 -0
  41. data/lib/log_entry.rb +49 -0
  42. data/lib/node.rb +28 -0
  43. data/lib/parse_error.rb +13 -0
  44. data/lib/parser_visitable.rb +138 -0
  45. data/lib/position_info.rb +46 -0
  46. data/lib/printer/base_printer.rb +24 -0
  47. data/lib/printer/console_printer.rb +66 -0
  48. data/lib/printer/nocolor_printer.rb +27 -0
  49. data/lib/printer/vim_printer.rb +19 -0
  50. data/lib/rule.rb +241 -0
  51. data/lib/rule_helper.rb +14 -0
  52. data/lib/runner.rb +225 -0
  53. data/rules.d/css.rule +127 -0
  54. data/rules.d/html.dtd.rule +22 -0
  55. data/rules.d/html.prop.rule +51 -0
  56. data/rules.d/html.tag.rule +136 -0
  57. data/rules.d/js.file.rule +13 -0
  58. data/rules.d/js.jquery.rule +56 -0
  59. data/rules.d/js.mergefile.rule +71 -0
  60. data/rules.d/js.rule +84 -0
  61. data/test/all_tests.rb +84 -0
  62. data/test/cli/cli_test.rb +70 -0
  63. data/test/cli/log_level_test.rb +51 -0
  64. data/test/cli/output_format_test.rb +47 -0
  65. data/test/cli/type_test.rb +77 -0
  66. data/test/css/mac_line_end_support_test.rb +38 -0
  67. data/test/css/parser_test.rb +276 -0
  68. data/test/css/rule/check_encoding_test.rb +66 -0
  69. data/test/css/rule/check_list_rule_test.rb +167 -0
  70. data/test/css/rule/compression_test.rb +53 -0
  71. data/test/css/rule/file_name_test.rb +76 -0
  72. data/test/fixtures/css/broken.css +4 -0
  73. data/test/fixtures/css/cbu/36.css +52 -0
  74. data/test/fixtures/css/cbu/china_top.css +324 -0
  75. data/test/fixtures/css/cbu/default-merge.css +3 -0
  76. data/test/fixtures/css/cbu/default.css +13 -0
  77. data/test/fixtures/css/cbu/diy-merge.css +25 -0
  78. data/test/fixtures/css/cbu/fns-v1.css +27 -0
  79. data/test/fixtures/css/cbu/index_v0.1.css +12 -0
  80. data/test/fixtures/css/cbu/merge.css +11 -0
  81. data/test/fixtures/css/cbu/min.css +2 -0
  82. data/test/fixtures/css/cbu/my_home_admin.css +126 -0
  83. data/test/fixtures/css/cbu/nav.css +95 -0
  84. data/test/fixtures/css/cbu/pic_list.css +386 -0
  85. data/test/fixtures/css/cbu/quote-edit.css +18 -0
  86. data/test/fixtures/css/cbu/selloffer.shopwindow.css +1 -0
  87. data/test/fixtures/css/cbu/v1.css +9 -0
  88. data/test/fixtures/css/css3.css +30 -0
  89. data/test/fixtures/css/empty-min.css +0 -0
  90. data/test/fixtures/css/empty.css +0 -0
  91. data/test/fixtures/css/font-family.css +4 -0
  92. data/test/fixtures/css/gb-good.css +14 -0
  93. data/test/fixtures/css/gb_using_star.css +4 -0
  94. data/test/fixtures/css/import.css +18 -0
  95. data/test/fixtures/css/mac-line-sep-err-min.css +1 -0
  96. data/test/fixtures/css/mac-line-sep-err.css +1 -0
  97. data/test/fixtures/css/mac-line-sep-good-min.css +1 -0
  98. data/test/fixtures/css/mac-line-sep-good.css +1 -0
  99. data/test/fixtures/css/multi-encoding-in-a-file.css +0 -0
  100. data/test/fixtures/css/simple.css +1 -0
  101. data/test/fixtures/css/using_expr.css +8 -0
  102. data/test/fixtures/css/using_hack.css +21 -0
  103. data/test/fixtures/css/using_id.css +1 -0
  104. data/test/fixtures/css/using_star.css +4 -0
  105. data/test/fixtures/css/utf8_good.css +6 -0
  106. data/test/fixtures/css/utf8_good_declaring_charset.css +7 -0
  107. data/test/fixtures/css/utf8_using_star.css +5 -0
  108. data/test/fixtures/html/1-1.html +120 -0
  109. data/test/fixtures/html/1-2.html +120 -0
  110. data/test/fixtures/html/cms.html +373 -0
  111. data/test/fixtures/html/css_out_of_head.html +9 -0
  112. data/test/fixtures/html/fdev-template.html +22 -0
  113. data/test/fixtures/html/google.com.html +33 -0
  114. data/test/fixtures/html/mixed_log_levels.html +4 -0
  115. data/test/fixtures/html/mixed_types.html +13 -0
  116. data/test/fixtures/html/no_dtd.html +6 -0
  117. data/test/fixtures/html/readme.html +94 -0
  118. data/test/fixtures/html/review.board.html +163 -0
  119. data/test/fixtures/html/syntax_err.html +3 -0
  120. data/test/fixtures/html/train/detail/345/233/276/346/226/207/347/273/223/345/220/210.html +208 -0
  121. data/test/fixtures/html/train/detail/347/232/204Flash.html +212 -0
  122. data/test/fixtures/html/train/detail/347/232/204Vedio.html +212 -0
  123. data/test/fixtures/html/train/index.html +37 -0
  124. data/test/fixtures/html/train/test.html +1 -0
  125. data/test/fixtures/html/train//344/277/256/346/224/271/344/270/200/347/272/247/345/210/206/347/261/273.html +112 -0
  126. data/test/fixtures/html/train//344/277/256/346/224/271/345/255/220/345/210/206/347/261/273.html +108 -0
  127. data/test/fixtures/html/train//344/277/256/346/224/271/350/257/276/347/250/213.html +195 -0
  128. data/test/fixtures/html/train//345/215/232/345/256/242/350/256/276/347/275/256.html +142 -0
  129. data/test/fixtures/html/train//346/265/217/350/247/210/350/256/260/345/275/225.html +191 -0
  130. data/test/fixtures/html/train//346/267/273/345/212/240/344/270/200/347/272/247/345/210/206/347/261/273.html +113 -0
  131. data/test/fixtures/html/train//346/267/273/345/212/240/345/255/220/345/210/206/347/261/273.html +112 -0
  132. data/test/fixtures/html/train//346/267/273/345/212/240/350/257/276/347/250/213.html +195 -0
  133. data/test/fixtures/html/train//347/231/273/345/275/225.html +20 -0
  134. data/test/fixtures/html/train//347/256/241/347/220/206/345/210/206/347/261/273.html +210 -0
  135. data/test/fixtures/html/train//347/256/241/347/220/206/345/217/215/351/246/210.html +222 -0
  136. data/test/fixtures/html/train//347/256/241/347/220/206/350/257/276/347/250/213.html +284 -0
  137. data/test/fixtures/html/train//347/256/241/347/220/206/350/264/246/346/210/267.html +107 -0
  138. data/test/fixtures/html/train//347/275/221/344/270/212/345/237/271/350/256/255home/351/241/265.html +354 -0
  139. data/test/fixtures/html/train//347/275/221/345/225/206/345/237/271/350/256/255list/351/241/265.html +255 -0
  140. data/test/fixtures/html/train//350/256/276/347/275/256/351/246/226/351/241/265/346/216/250/350/215/220.html +168 -0
  141. data/test/fixtures/html/train//350/257/264/346/230/216.txt +3 -0
  142. data/test/fixtures/html/train//351/246/226/351/241/265/345/271/277/345/221/212/350/256/276/347/275/256.html +297 -0
  143. data/test/fixtures/html/unescaped.html +2 -0
  144. data/test/fixtures/html/view.vm +916 -0
  145. data/test/fixtures/js/jquery-1.7.js +9300 -0
  146. data/test/fixtures/js/scope-test.js +22 -0
  147. data/test/helper.rb +41 -0
  148. data/test/html/mixed_type_test.rb +35 -0
  149. data/test/html/parser/parse_comment_test.rb +47 -0
  150. data/test/html/parser/parse_dtd_test.rb +46 -0
  151. data/test/html/parser/parse_script_tag_test.rb +55 -0
  152. data/test/html/parser/parse_with_auto_close_tag_test.rb +41 -0
  153. data/test/html/parser/parse_with_diff_case_test.rb +38 -0
  154. data/test/html/parser/parse_with_emtpy_test.rb +22 -0
  155. data/test/html/parser/parse_with_multi_children_test.rb +27 -0
  156. data/test/html/parser/parse_with_multi_line_test.rb +41 -0
  157. data/test/html/parser/parse_with_prop_test.rb +88 -0
  158. data/test/html/parser/parse_with_script_tag_test.rb +26 -0
  159. data/test/html/parser/parse_with_selfclosing_test.rb +39 -0
  160. data/test/html/parser/parse_with_simple_tag_test.rb +44 -0
  161. data/test/html/parser/parse_with_simple_tree_test.rb +40 -0
  162. data/test/html/parser/parse_with_style_tag_test.rb +22 -0
  163. data/test/html/parser/parse_with_text_test.rb +45 -0
  164. data/test/html/parser_test.rb +52 -0
  165. data/test/html/query_test.rb +52 -0
  166. data/test/html/rule/check_block_level_element_test.rb +52 -0
  167. data/test/html/rule/check_button_test.rb +45 -0
  168. data/test/html/rule/check_class_count_test.rb +36 -0
  169. data/test/html/rule/check_css_in_head_test.rb +53 -0
  170. data/test/html/rule/check_dtd_test.rb +46 -0
  171. data/test/html/rule/check_form_element_name_test.rb +49 -0
  172. data/test/html/rule/check_head_contain_meta_and_title_test.rb +52 -0
  173. data/test/html/rule/check_html_template_test.rb +103 -0
  174. data/test/html/rule/check_hyperlink_with_target_test.rb +40 -0
  175. data/test/html/rule/check_hyperlink_with_title_test.rb +43 -0
  176. data/test/html/rule/check_id_n_class_downcase_test.rb +40 -0
  177. data/test/html/rule/check_img_with_alt_prop_test.rb +33 -0
  178. data/test/html/rule/check_no_import_css_test.rb +36 -0
  179. data/test/html/rule/check_prop_have_value_test.rb +32 -0
  180. data/test/html/rule/check_prop_seperator_test.rb +32 -0
  181. data/test/html/rule/check_style_prop_test.rb +30 -0
  182. data/test/html/rule/check_tag_closed_test.rb +59 -0
  183. data/test/html/rule/check_tag_downcase_test.rb +51 -0
  184. data/test/html/rule/check_unescape_char_test.rb +35 -0
  185. data/test/html/rule/check_unique_import_test.rb +56 -0
  186. data/test/html/rule_test.rb +62 -0
  187. data/test/js/expr/expr.rb +57 -0
  188. data/test/js/expr/left_hand.rb +25 -0
  189. data/test/js/expr/operate.rb +145 -0
  190. data/test/js/expr/primary.rb +89 -0
  191. data/test/js/parser_test.rb +98 -0
  192. data/test/js/rule/alert_check_test.rb +37 -0
  193. data/test/js/rule/all_test.rb +23 -0
  194. data/test/js/rule/base_test.rb +34 -0
  195. data/test/js/rule/file_checker_test.rb +131 -0
  196. data/test/js/rule/jq_check_test.rb +90 -0
  197. data/test/js/rule/nest_try_catch_test.rb +71 -0
  198. data/test/js/rule/new_object_and_new_array_test.rb +38 -0
  199. data/test/js/rule/no_eval_test.rb +34 -0
  200. data/test/js/rule/no_global_test.rb +88 -0
  201. data/test/js/rule/private_method_check_test.rb +58 -0
  202. data/test/js/rule/semicolon_test.rb +63 -0
  203. data/test/js/rule/stat_if_with_brace_test.rb +68 -0
  204. data/test/js/rule/stat_if_with_muti_else_test.rb +68 -0
  205. data/test/js/rule/use_strict_equal_test.rb +44 -0
  206. data/test/js/rule_test.rb +47 -0
  207. data/test/js/stat/if.rb +26 -0
  208. data/test/js/stat/iter.rb +115 -0
  209. data/test/js/stat/stat.rb +91 -0
  210. data/test/js/stat/switch.rb +37 -0
  211. data/test/js/stat/try.rb +32 -0
  212. data/test/js/stat/var.rb +38 -0
  213. data/test/parser_visitable_test.rb +102 -0
  214. data/test/position_info_test.rb +66 -0
  215. data/test/rule_dsl/dsl_basic_test.rb +91 -0
  216. data/test/rule_dsl/importing_test.rb +48 -0
  217. data/test/runner/log_level_test.rb +58 -0
  218. metadata +317 -0
data/lib/css/reader.rb ADDED
@@ -0,0 +1,30 @@
1
+ require_relative '../encoding_error'
2
+ require_relative '../helper/file_reader'
3
+
4
+ module XRay
5
+ module CSS
6
+
7
+ class Reader
8
+
9
+ include XRay::Helper
10
+
11
+ def self.read( file, opt = {} )
12
+ source, enc = FileReader::readfile(file)
13
+ declare = get_encoding_declaration(file)
14
+ if declare and enc != declare
15
+ raise EncodingError.new
16
+ end
17
+ source
18
+ end
19
+
20
+ def self.get_encoding_declaration( file )
21
+ begin
22
+ File.open(file, &:readline)[/@charset\s+(['"])(.*?)\1/, 2]
23
+ rescue
24
+ end
25
+ end
26
+ end
27
+
28
+ end
29
+
30
+ end
@@ -0,0 +1,48 @@
1
+ # encoding: utf-8
2
+ require_relative '../../log_entry'
3
+
4
+ module XRay
5
+ module CSS
6
+ module Rule
7
+
8
+ class CompressionChecker
9
+
10
+ def initialize( opt={} )
11
+ @opt = opt.dup
12
+ end
13
+
14
+ def check_file( name )
15
+ check_items([
16
+ :has_minified_in_same_folder
17
+ ], name)
18
+ end
19
+
20
+ def check_has_minified_in_same_folder( name )
21
+ unless is_min_file?(name) or is_merge_file?(name) or File.exist?( name.sub(/\.css$/,'-min.css') )
22
+ [LogEntry.new('发布上线的文件需要压缩,命名规则如a.js->a-min.js,且两者在同一目录下', :error)]
23
+ end
24
+ end
25
+
26
+ protected
27
+ def check_items( items, name )
28
+ results = []
29
+ items.each do |i|
30
+ r = self.send(:"check_#{i}", name)
31
+ results.concat r if Array === r
32
+ end
33
+ results
34
+ end
35
+
36
+ def is_merge_file?( name )
37
+ name =~ /merge\.css$/
38
+ end
39
+
40
+ def is_min_file?( name )
41
+ name =~ /min\.css$/
42
+ end
43
+
44
+ end
45
+
46
+ end
47
+ end
48
+ end
@@ -0,0 +1,45 @@
1
+ # encoding: utf-8
2
+
3
+ require_relative '../../rule'
4
+ require_relative '../../context'
5
+
6
+ module XRay
7
+ module CSS
8
+ module Rule
9
+
10
+ class CheckListRule
11
+
12
+ attr_reader :options
13
+
14
+ include ::XRay::Rule, Context
15
+
16
+ def initialize(options = {})
17
+ @options = options
18
+ end
19
+
20
+ def visit_simple_selector(selector)
21
+ check_css_selector selector
22
+ end
23
+
24
+ def visit_declaration(dec)
25
+ check_declaration dec
26
+ end
27
+
28
+ def visit_ruleset(ruleset)
29
+ check_css_ruleset ruleset
30
+ end
31
+
32
+ def visit_property(property)
33
+ check_css_property property
34
+ end
35
+
36
+ def visit_value(value)
37
+ check_css_value value
38
+ end
39
+
40
+
41
+ end
42
+
43
+ end
44
+ end
45
+ end
data/lib/css/struct.rb ADDED
@@ -0,0 +1,111 @@
1
+ require_relative '../node'
2
+
3
+ module XRay
4
+ module CSS
5
+ Node = XRay::Node
6
+
7
+ class StyleSheet < Node
8
+ attr_reader :statements
9
+
10
+ def initialize(statements)
11
+ @statements = statements
12
+ end
13
+
14
+ def text
15
+ rulesets.collect(&:text).join("\n")
16
+ end
17
+
18
+ def position
19
+ rulesets.empty? ? nil : rulesets[0].position
20
+ end
21
+
22
+ def directives
23
+ statements.select { |elm| elm.is_a? Directive }
24
+ end
25
+
26
+ def rulesets
27
+ statements.select { |elm| elm.is_a? RuleSet }
28
+ end
29
+
30
+ alias :at_rules :directives
31
+ end
32
+
33
+ class Directive < Node
34
+ attr_reader :keyword, :expression, :block
35
+
36
+ def initialize(keyword, expression, block = nil)
37
+ @keyword, @expression, @block = keyword, expression, block
38
+ end
39
+
40
+ def text
41
+ t = "@#{keyword}"
42
+ if expression
43
+ t += "#{expression}"
44
+ end
45
+ if block
46
+ t += "{\n#{block}\n}\n"
47
+ else
48
+ t += ';'
49
+ end
50
+ t
51
+ end
52
+
53
+ def position
54
+ keyword.position
55
+ end
56
+ end
57
+
58
+ class RuleSet < Node
59
+ attr_reader :selector, :declarations
60
+
61
+ def initialize(selector, declarations)
62
+ @selector, @declarations = selector, declarations
63
+ end
64
+
65
+ def text
66
+ decs_text = declarations.collect { |dec|
67
+ "#{' ' * 4}#{dec};"
68
+ }.join("\n")
69
+
70
+ "#{selector} {\n#{decs_text}\n}"
71
+ end
72
+
73
+ def position
74
+ selector.position
75
+ end
76
+ end
77
+
78
+ class Selector < Node
79
+ attr_reader :simple_selectors
80
+
81
+ def initialize(simple_selectors)
82
+ @simple_selectors = simple_selectors
83
+ end
84
+
85
+ def text
86
+ @simple_selectors.collect(&:text).join(', ')
87
+ end
88
+
89
+ def position
90
+ simple_selectors.empty? ? nil : simple_selectors[0].position
91
+ end
92
+ end
93
+
94
+ class Declaration < Node
95
+ attr_reader :property, :value
96
+
97
+ def initialize(property, value)
98
+ @property, @value = property, value
99
+ end
100
+
101
+ def text
102
+ "#{property}: #{value}"
103
+ end
104
+
105
+ def position
106
+ property.position
107
+ end
108
+ end
109
+
110
+ end
111
+ end
@@ -0,0 +1,6 @@
1
+ module XRay
2
+
3
+ class EncodingError < RuntimeError
4
+ end
5
+
6
+ end
@@ -0,0 +1,38 @@
1
+ module XRay
2
+
3
+ class FileValidator
4
+ def initialize( options )
5
+ @options = options
6
+ @validators = []
7
+ end
8
+
9
+ def add_validator( val )
10
+ @validators << val
11
+ end
12
+
13
+ def add_validators( vals)
14
+ vals.each { |val| add_validator val }
15
+ end
16
+
17
+ def check( file )
18
+ results = []
19
+ @validators.each do |val|
20
+ if val.respond_to? :check_file
21
+ val_results = val.check_file(file)
22
+ if val_results
23
+ if val_results.is_a? Array
24
+ results.concat val_results
25
+ else
26
+ results << val_results
27
+ end
28
+ end
29
+ end
30
+ end
31
+ results
32
+ end
33
+
34
+ alias_method :validate, :check
35
+
36
+ end
37
+
38
+ end
@@ -0,0 +1,50 @@
1
+ class CodeType
2
+
3
+ class << self
4
+
5
+ public
6
+ def guess(text, filename=nil)
7
+ if filename && !filename.empty?
8
+ guess_by_name filename
9
+ else
10
+ guess_by_content text
11
+ end
12
+ end
13
+
14
+ def guess_by_name( filename )
15
+ case File.extname( filename )
16
+ when /\.css$/i
17
+ :css
18
+ when /\.js$/i
19
+ :js
20
+ else
21
+ :html #TODO: support more suffix
22
+ end
23
+ end
24
+
25
+ def guess_by_content(text)
26
+ return :html if is_html? text
27
+ return :css if is_css? text
28
+ :js #TODO: support more code syntaxes
29
+ end
30
+
31
+ def is_style_file?(filename)
32
+ File.extname( filename ) =~ /(css|js|html?)$/i
33
+ end
34
+
35
+ def scope(filename)
36
+ filename =~ /[\\\/]lib[\\\/]/ ? 'lib' : 'page'
37
+ end
38
+
39
+ private
40
+ def is_html?(text)
41
+ /^\s*</m =~ text
42
+ end
43
+
44
+ def is_css?(text)
45
+ /^\s*@/m =~ text or /^\s*([-\*:\.#_\w]+\s*)+\{/ =~ text
46
+ end
47
+
48
+ end
49
+
50
+ end
@@ -0,0 +1,44 @@
1
+ class ColorString
2
+
3
+ @@colors = %w(black red green yellow blue magenta cyan white)
4
+
5
+ def self.colors
6
+ @@colors
7
+ end
8
+
9
+ def initialize(str, color=nil, bg=nil)
10
+ @str, @color, @bg = str, color, bg
11
+ end
12
+
13
+ def to_s
14
+ return @str if @color.nil? and @bg.nil?
15
+ s = []
16
+ s << "3#{@color}" unless @color.nil?
17
+ s << "4#{@bg}" unless @bg.nil?
18
+ "\e[#{s.join(';')}m" << @str << "\e[0m"
19
+ end
20
+
21
+ def inspect
22
+ "#<ColorString \"#{@str}\", color:#{@color}, bg:#{@bg}>"
23
+ end
24
+
25
+ String.public_instance_methods.each do |m|
26
+ unless self.respond_to? m
27
+ define_method(m) { |*arg| to_s.send( m, *arg ) }
28
+ end
29
+ end
30
+
31
+ @@colors.each_with_index do |color, i|
32
+ String.class_eval do
33
+ define_method(color) { ColorString.new(self, i) }
34
+ define_method(color << '_bg') { ColorString.new(self, nil, i) }
35
+ end
36
+
37
+ ColorString.class_eval do
38
+ define_method(color) { @color = i; self }
39
+ define_method(color << '_bg') { @bg = i; self }
40
+ end
41
+
42
+ end
43
+ end
44
+
@@ -0,0 +1,22 @@
1
+ require_relative 'strenc'
2
+
3
+ module XRay
4
+ module Helper
5
+ module FileReader
6
+ extend self
7
+
8
+ # auto detect file encoding and read it.
9
+ # return with an array containing string
10
+ # and encoding
11
+ def readfile(path, opt={})
12
+ if File.readable?(path)
13
+ bin = File.read(path).utf8!
14
+ [bin, bin.former_enc ||'ascii-8bit' ]
15
+ else
16
+ raise ArgumentError.new("File is not readable!")
17
+ end
18
+ end
19
+
20
+ end
21
+ end
22
+ end
@@ -0,0 +1,65 @@
1
+ # encoding: utf-8
2
+
3
+ class String
4
+
5
+ @@encs = %w(ascii utf-8 gb18030 gbk gb2312 cp936 big5)
6
+
7
+ class << self
8
+ def encs; @@encs; end
9
+ def encs=(arr); @@encs = arr; end
10
+ end
11
+
12
+ if public_instance_methods.include? :encode!
13
+ def try_convert to, from
14
+ encode!( to, from ).force_encoding( to )
15
+ valid_encoding?
16
+ end
17
+ else
18
+ # for ruby 1.8
19
+ require 'iconv'
20
+ def try_convert to, from
21
+ text = Iconv.new(to, from).iconv(self)
22
+ replace(text) if self =~ /./
23
+ end
24
+ end
25
+
26
+ attr_reader :former_enc
27
+
28
+ def enc!(encoding)
29
+
30
+ if respond_to? :force_encoding
31
+ force_encoding encoding
32
+ if valid_encoding?
33
+ @former_enc = encoding
34
+ return self
35
+ end
36
+ end
37
+
38
+ @@encs.each do |c|
39
+ begin
40
+ if send :try_convert, encoding, c
41
+ @former_enc = c
42
+ break
43
+ end
44
+ rescue
45
+ end
46
+ end
47
+ self
48
+ end
49
+
50
+ @@encs.each do |tar|
51
+ mtd = tar.gsub(/-/, '') << "!"
52
+ define_method mtd do
53
+ enc! tar
54
+ end
55
+ public mtd
56
+ end
57
+
58
+ private :try_convert
59
+
60
+ end
61
+
62
+
63
+ if __FILE__ == $0
64
+ puts File.read('test/fixtures/html/1-1.html').utf8!
65
+ end
@@ -0,0 +1,212 @@
1
+ require_relative '../base_parser'
2
+ require_relative '../css/parser'
3
+ require_relative '../js/parser'
4
+ require_relative 'struct'
5
+
6
+ module XRay; module HTML
7
+
8
+ class Parser < BaseParser
9
+
10
+ def self.parse(src, &block)
11
+ parser = self.new(src)
12
+ doc = parser.parse
13
+ yield doc if block_given?
14
+ doc
15
+ end
16
+
17
+ TEXT = /[^<]+/m
18
+ PROP_NAME = %r/\w[-:\w]*/m
19
+ PROP_VALUE = %r/'([^']*)'|"([^"]*)"|([^\s>]+)/m
20
+ PROP = %r/#{PROP_NAME}\s*(?:=\s*#{PROP_VALUE})?/m
21
+ TAG_NAME = /\w[^>\(\)\/\s]*/
22
+ TAG_START = %r/<(#{TAG_NAME})/m
23
+ TAG_END = %r/<\/#{TAG_NAME}\s*>/m
24
+ TAG = %r/#{TAG_START}(\s+#{PROP})*\s*>/m
25
+ SELF_CLOSE_TAG = %r/#{TAG_START}(\s+#{PROP})*\s*\/>/m
26
+ DTD = /\s*<!(doctype)\s+(.*?)>/im
27
+ COMMENT = /<!--(.*?)-->/m
28
+
29
+ def parse
30
+ parse_doc
31
+ end
32
+
33
+ def parse_doc
34
+ nodes = batch(:parse_element)
35
+ case nodes.size
36
+ when 0 then nil
37
+ when 1 then nodes[0]
38
+ else
39
+ ::XRay::HTML::Document.new( nodes )
40
+ end
41
+ end
42
+
43
+ def parse_element
44
+ if @scanner.check(DTD) and !@dtd_checked
45
+ @dtd_checked = true
46
+ parse_dtd
47
+ elsif @scanner.check(COMMENT)
48
+ parse_comment
49
+ elsif @scanner.check(TAG_START)
50
+ parse_tag
51
+ elsif !text_end?
52
+ parse_text
53
+ else
54
+ parse_error('Invalid HTML struct')
55
+ end
56
+ end
57
+
58
+ def parse_dtd
59
+ node = scan(DTD)
60
+ DTDElement.new(@scanner[2], @scanner[1], node.position)
61
+ end
62
+
63
+ def parse_comment
64
+ scan COMMENT
65
+ CommentElement.new(@scanner[1])
66
+ end
67
+
68
+ def parse_text
69
+ text = ''
70
+ until text_end? do
71
+ text << '<' if @scanner.skip(/</)
72
+ text << "#{@scanner.scan(TEXT)}"
73
+
74
+ # TODO: make this detection a rule
75
+ parse_warn "'#{$~}' not escaped" if text =~ /<|>/
76
+ end
77
+ TextElement.new text
78
+ end
79
+
80
+ def parse_tag
81
+ if @scanner.check DTD
82
+ parse_dtd_tag
83
+ elsif @scanner.check SELF_CLOSE_TAG
84
+ parse_self_ending_tag
85
+ elsif @scanner.check TAG
86
+ parse_normal_tag
87
+ else
88
+ parse_error('Invalid HTML struct')
89
+ end
90
+ end
91
+
92
+ def parse_properties
93
+ skip_empty
94
+ props = []
95
+ until prop_search_done? do
96
+ prop = parse_property
97
+ props << prop if prop
98
+ skip_empty
99
+ end
100
+ props
101
+ end
102
+
103
+ def parse_property
104
+ name = parse_prop_name
105
+ if @scanner.check( /\s*=/ )
106
+ skip /[=]/
107
+ sep = @scanner.check(/['"]/)
108
+ value = parse_prop_value
109
+ end
110
+ Property.new name, value, sep
111
+ end
112
+
113
+ def parse_prop_name
114
+ scan PROP_NAME
115
+ end
116
+
117
+ def parse_prop_value
118
+ scan PROP_VALUE
119
+ "#{@scanner[1]}#{@scanner[2]}#{@scanner[3]}"
120
+ end
121
+
122
+ protected
123
+ def prop_search_done?
124
+ @scanner.check(/\/>|>/) or @scanner.eos?
125
+ end
126
+
127
+ def parse_normal_tag
128
+ skip /</
129
+ tag, prop = scan(TAG_NAME), parse_properties
130
+ @parsing_script = tag =~ /^script$/i
131
+ skip />/
132
+
133
+ scopes << tag.text
134
+
135
+ children = []
136
+ ending = nil
137
+ begin
138
+ end_tag = %r(<#{tag.text.sub(/^(?!=\/)/, '\/')}>)i
139
+ rescue
140
+ raise ::XRay::ParseError.new("invalid tag name: #{tag.text}", scanner_pos)
141
+ end
142
+
143
+ if auto_close?(tag.text) and !@scanner.check(end_tag)
144
+ close_type = :none
145
+ else
146
+ until @scanner.check(TAG_END) or @scanner.eos? do
147
+ child = parse_element
148
+ children << child if child
149
+ end
150
+ begin
151
+ ending = scan(end_tag).text
152
+ close_type = :after
153
+ rescue => e
154
+ close_type = :none
155
+ raise e
156
+ end
157
+ end
158
+ @parsing_script = false
159
+
160
+ scopes.pop
161
+
162
+ el = Element.new(tag, prop, children, close_type, ending)
163
+ el.scopes = scopes.dup
164
+ el
165
+ end
166
+
167
+ def scopes
168
+ @scopes ||= []
169
+ end
170
+
171
+ def parse_dtd_tag
172
+ scan DTD
173
+ end
174
+
175
+ def parse_self_ending_tag
176
+ skip /</
177
+ tag = scan(TAG_NAME)
178
+ prop = parse_properties
179
+ skip /\/>/
180
+ el = Element.new(tag, prop, [], :self)
181
+ el.scopes = scopes.dup
182
+ el
183
+ end
184
+
185
+ def auto_close?(tag)
186
+ XRay::HTML::AUTO_CLOSE_TAGS.include?(tag.to_s.downcase)
187
+ end
188
+
189
+ def text_end?
190
+
191
+ return true if @scanner.eos?
192
+
193
+ if @parsing_script
194
+ @scanner.check(/<\/script\s*>/)
195
+ else
196
+ @scanner.check(%r(#{TAG}|#{SELF_CLOSE_TAG}|#{TAG_END}|#{COMMENT}))
197
+ end
198
+ end
199
+
200
+ end
201
+
202
+
203
+ end; end
204
+
205
+ if __FILE__ == $0
206
+ XRay::HTML::Parser.parse(%q(<div class="info" checked>information</div>)) { |e| puts e.outer_html }
207
+ XRay::HTML::Parser.parse(%q(<img width="100" height='150' id=img > <center>text</center>)) { |e| puts e.first.outer_html }
208
+ XRay::HTML::Parser.parse(%q(<center><div><div><center>text</center></div></div></center>)) { |e| puts e.outer_html }
209
+ XRay::HTML::Parser.parse(%q(<center ns:name="value"><div><div><center>text</center></div></div></center>)) { |e| puts e.outer_html }
210
+ begin; XRay::HTML::Parser.parse(%q(<center><div></center></div>)) { |e| puts e.outer_html }; rescue; end
211
+ XRay::HTML::Parser.parse('<br/>') { |e| puts e.outer_html }
212
+ end