fdlint 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (218) hide show
  1. data/Gemfile +8 -0
  2. data/Gemfile.lock +14 -0
  3. data/README.md +68 -0
  4. data/Rakefile +92 -0
  5. data/bin/fdlint +17 -0
  6. data/lib/base_parser.rb +143 -0
  7. data/lib/cmd_runner.rb +145 -0
  8. data/lib/context.rb +31 -0
  9. data/lib/css/parser.rb +186 -0
  10. data/lib/css/reader.rb +30 -0
  11. data/lib/css/rule/check_compression_rule.rb +48 -0
  12. data/lib/css/rule/checklist.rb +45 -0
  13. data/lib/css/struct.rb +111 -0
  14. data/lib/encoding_error.rb +6 -0
  15. data/lib/file_validator.rb +38 -0
  16. data/lib/helper/code_type.rb +50 -0
  17. data/lib/helper/color_string.rb +44 -0
  18. data/lib/helper/file_reader.rb +22 -0
  19. data/lib/helper/strenc.rb +65 -0
  20. data/lib/html/parser.rb +212 -0
  21. data/lib/html/query.rb +96 -0
  22. data/lib/html/rule/check_tag_rule.rb +80 -0
  23. data/lib/html/struct.rb +291 -0
  24. data/lib/js/expr/expr.rb +66 -0
  25. data/lib/js/expr/left_hand.rb +63 -0
  26. data/lib/js/expr/operate.rb +92 -0
  27. data/lib/js/expr/primary.rb +166 -0
  28. data/lib/js/parser.rb +116 -0
  29. data/lib/js/rule/all.rb +35 -0
  30. data/lib/js/rule/checklist.rb +41 -0
  31. data/lib/js/rule/file_checker.rb +42 -0
  32. data/lib/js/rule/helper.rb +96 -0
  33. data/lib/js/rule/no_global.rb +87 -0
  34. data/lib/js/stat/if.rb +25 -0
  35. data/lib/js/stat/iter.rb +85 -0
  36. data/lib/js/stat/stat.rb +117 -0
  37. data/lib/js/stat/switch.rb +65 -0
  38. data/lib/js/stat/try.rb +28 -0
  39. data/lib/js/stat/var.rb +40 -0
  40. data/lib/js/struct.rb +248 -0
  41. data/lib/log_entry.rb +49 -0
  42. data/lib/node.rb +28 -0
  43. data/lib/parse_error.rb +13 -0
  44. data/lib/parser_visitable.rb +138 -0
  45. data/lib/position_info.rb +46 -0
  46. data/lib/printer/base_printer.rb +24 -0
  47. data/lib/printer/console_printer.rb +66 -0
  48. data/lib/printer/nocolor_printer.rb +27 -0
  49. data/lib/printer/vim_printer.rb +19 -0
  50. data/lib/rule.rb +241 -0
  51. data/lib/rule_helper.rb +14 -0
  52. data/lib/runner.rb +225 -0
  53. data/rules.d/css.rule +127 -0
  54. data/rules.d/html.dtd.rule +22 -0
  55. data/rules.d/html.prop.rule +51 -0
  56. data/rules.d/html.tag.rule +136 -0
  57. data/rules.d/js.file.rule +13 -0
  58. data/rules.d/js.jquery.rule +56 -0
  59. data/rules.d/js.mergefile.rule +71 -0
  60. data/rules.d/js.rule +84 -0
  61. data/test/all_tests.rb +84 -0
  62. data/test/cli/cli_test.rb +70 -0
  63. data/test/cli/log_level_test.rb +51 -0
  64. data/test/cli/output_format_test.rb +47 -0
  65. data/test/cli/type_test.rb +77 -0
  66. data/test/css/mac_line_end_support_test.rb +38 -0
  67. data/test/css/parser_test.rb +276 -0
  68. data/test/css/rule/check_encoding_test.rb +66 -0
  69. data/test/css/rule/check_list_rule_test.rb +167 -0
  70. data/test/css/rule/compression_test.rb +53 -0
  71. data/test/css/rule/file_name_test.rb +76 -0
  72. data/test/fixtures/css/broken.css +4 -0
  73. data/test/fixtures/css/cbu/36.css +52 -0
  74. data/test/fixtures/css/cbu/china_top.css +324 -0
  75. data/test/fixtures/css/cbu/default-merge.css +3 -0
  76. data/test/fixtures/css/cbu/default.css +13 -0
  77. data/test/fixtures/css/cbu/diy-merge.css +25 -0
  78. data/test/fixtures/css/cbu/fns-v1.css +27 -0
  79. data/test/fixtures/css/cbu/index_v0.1.css +12 -0
  80. data/test/fixtures/css/cbu/merge.css +11 -0
  81. data/test/fixtures/css/cbu/min.css +2 -0
  82. data/test/fixtures/css/cbu/my_home_admin.css +126 -0
  83. data/test/fixtures/css/cbu/nav.css +95 -0
  84. data/test/fixtures/css/cbu/pic_list.css +386 -0
  85. data/test/fixtures/css/cbu/quote-edit.css +18 -0
  86. data/test/fixtures/css/cbu/selloffer.shopwindow.css +1 -0
  87. data/test/fixtures/css/cbu/v1.css +9 -0
  88. data/test/fixtures/css/css3.css +30 -0
  89. data/test/fixtures/css/empty-min.css +0 -0
  90. data/test/fixtures/css/empty.css +0 -0
  91. data/test/fixtures/css/font-family.css +4 -0
  92. data/test/fixtures/css/gb-good.css +14 -0
  93. data/test/fixtures/css/gb_using_star.css +4 -0
  94. data/test/fixtures/css/import.css +18 -0
  95. data/test/fixtures/css/mac-line-sep-err-min.css +1 -0
  96. data/test/fixtures/css/mac-line-sep-err.css +1 -0
  97. data/test/fixtures/css/mac-line-sep-good-min.css +1 -0
  98. data/test/fixtures/css/mac-line-sep-good.css +1 -0
  99. data/test/fixtures/css/multi-encoding-in-a-file.css +0 -0
  100. data/test/fixtures/css/simple.css +1 -0
  101. data/test/fixtures/css/using_expr.css +8 -0
  102. data/test/fixtures/css/using_hack.css +21 -0
  103. data/test/fixtures/css/using_id.css +1 -0
  104. data/test/fixtures/css/using_star.css +4 -0
  105. data/test/fixtures/css/utf8_good.css +6 -0
  106. data/test/fixtures/css/utf8_good_declaring_charset.css +7 -0
  107. data/test/fixtures/css/utf8_using_star.css +5 -0
  108. data/test/fixtures/html/1-1.html +120 -0
  109. data/test/fixtures/html/1-2.html +120 -0
  110. data/test/fixtures/html/cms.html +373 -0
  111. data/test/fixtures/html/css_out_of_head.html +9 -0
  112. data/test/fixtures/html/fdev-template.html +22 -0
  113. data/test/fixtures/html/google.com.html +33 -0
  114. data/test/fixtures/html/mixed_log_levels.html +4 -0
  115. data/test/fixtures/html/mixed_types.html +13 -0
  116. data/test/fixtures/html/no_dtd.html +6 -0
  117. data/test/fixtures/html/readme.html +94 -0
  118. data/test/fixtures/html/review.board.html +163 -0
  119. data/test/fixtures/html/syntax_err.html +3 -0
  120. data/test/fixtures/html/train/detail/345/233/276/346/226/207/347/273/223/345/220/210.html +208 -0
  121. data/test/fixtures/html/train/detail/347/232/204Flash.html +212 -0
  122. data/test/fixtures/html/train/detail/347/232/204Vedio.html +212 -0
  123. data/test/fixtures/html/train/index.html +37 -0
  124. data/test/fixtures/html/train/test.html +1 -0
  125. data/test/fixtures/html/train//344/277/256/346/224/271/344/270/200/347/272/247/345/210/206/347/261/273.html +112 -0
  126. data/test/fixtures/html/train//344/277/256/346/224/271/345/255/220/345/210/206/347/261/273.html +108 -0
  127. data/test/fixtures/html/train//344/277/256/346/224/271/350/257/276/347/250/213.html +195 -0
  128. data/test/fixtures/html/train//345/215/232/345/256/242/350/256/276/347/275/256.html +142 -0
  129. data/test/fixtures/html/train//346/265/217/350/247/210/350/256/260/345/275/225.html +191 -0
  130. data/test/fixtures/html/train//346/267/273/345/212/240/344/270/200/347/272/247/345/210/206/347/261/273.html +113 -0
  131. data/test/fixtures/html/train//346/267/273/345/212/240/345/255/220/345/210/206/347/261/273.html +112 -0
  132. data/test/fixtures/html/train//346/267/273/345/212/240/350/257/276/347/250/213.html +195 -0
  133. data/test/fixtures/html/train//347/231/273/345/275/225.html +20 -0
  134. data/test/fixtures/html/train//347/256/241/347/220/206/345/210/206/347/261/273.html +210 -0
  135. data/test/fixtures/html/train//347/256/241/347/220/206/345/217/215/351/246/210.html +222 -0
  136. data/test/fixtures/html/train//347/256/241/347/220/206/350/257/276/347/250/213.html +284 -0
  137. data/test/fixtures/html/train//347/256/241/347/220/206/350/264/246/346/210/267.html +107 -0
  138. data/test/fixtures/html/train//347/275/221/344/270/212/345/237/271/350/256/255home/351/241/265.html +354 -0
  139. data/test/fixtures/html/train//347/275/221/345/225/206/345/237/271/350/256/255list/351/241/265.html +255 -0
  140. data/test/fixtures/html/train//350/256/276/347/275/256/351/246/226/351/241/265/346/216/250/350/215/220.html +168 -0
  141. data/test/fixtures/html/train//350/257/264/346/230/216.txt +3 -0
  142. data/test/fixtures/html/train//351/246/226/351/241/265/345/271/277/345/221/212/350/256/276/347/275/256.html +297 -0
  143. data/test/fixtures/html/unescaped.html +2 -0
  144. data/test/fixtures/html/view.vm +916 -0
  145. data/test/fixtures/js/jquery-1.7.js +9300 -0
  146. data/test/fixtures/js/scope-test.js +22 -0
  147. data/test/helper.rb +41 -0
  148. data/test/html/mixed_type_test.rb +35 -0
  149. data/test/html/parser/parse_comment_test.rb +47 -0
  150. data/test/html/parser/parse_dtd_test.rb +46 -0
  151. data/test/html/parser/parse_script_tag_test.rb +55 -0
  152. data/test/html/parser/parse_with_auto_close_tag_test.rb +41 -0
  153. data/test/html/parser/parse_with_diff_case_test.rb +38 -0
  154. data/test/html/parser/parse_with_emtpy_test.rb +22 -0
  155. data/test/html/parser/parse_with_multi_children_test.rb +27 -0
  156. data/test/html/parser/parse_with_multi_line_test.rb +41 -0
  157. data/test/html/parser/parse_with_prop_test.rb +88 -0
  158. data/test/html/parser/parse_with_script_tag_test.rb +26 -0
  159. data/test/html/parser/parse_with_selfclosing_test.rb +39 -0
  160. data/test/html/parser/parse_with_simple_tag_test.rb +44 -0
  161. data/test/html/parser/parse_with_simple_tree_test.rb +40 -0
  162. data/test/html/parser/parse_with_style_tag_test.rb +22 -0
  163. data/test/html/parser/parse_with_text_test.rb +45 -0
  164. data/test/html/parser_test.rb +52 -0
  165. data/test/html/query_test.rb +52 -0
  166. data/test/html/rule/check_block_level_element_test.rb +52 -0
  167. data/test/html/rule/check_button_test.rb +45 -0
  168. data/test/html/rule/check_class_count_test.rb +36 -0
  169. data/test/html/rule/check_css_in_head_test.rb +53 -0
  170. data/test/html/rule/check_dtd_test.rb +46 -0
  171. data/test/html/rule/check_form_element_name_test.rb +49 -0
  172. data/test/html/rule/check_head_contain_meta_and_title_test.rb +52 -0
  173. data/test/html/rule/check_html_template_test.rb +103 -0
  174. data/test/html/rule/check_hyperlink_with_target_test.rb +40 -0
  175. data/test/html/rule/check_hyperlink_with_title_test.rb +43 -0
  176. data/test/html/rule/check_id_n_class_downcase_test.rb +40 -0
  177. data/test/html/rule/check_img_with_alt_prop_test.rb +33 -0
  178. data/test/html/rule/check_no_import_css_test.rb +36 -0
  179. data/test/html/rule/check_prop_have_value_test.rb +32 -0
  180. data/test/html/rule/check_prop_seperator_test.rb +32 -0
  181. data/test/html/rule/check_style_prop_test.rb +30 -0
  182. data/test/html/rule/check_tag_closed_test.rb +59 -0
  183. data/test/html/rule/check_tag_downcase_test.rb +51 -0
  184. data/test/html/rule/check_unescape_char_test.rb +35 -0
  185. data/test/html/rule/check_unique_import_test.rb +56 -0
  186. data/test/html/rule_test.rb +62 -0
  187. data/test/js/expr/expr.rb +57 -0
  188. data/test/js/expr/left_hand.rb +25 -0
  189. data/test/js/expr/operate.rb +145 -0
  190. data/test/js/expr/primary.rb +89 -0
  191. data/test/js/parser_test.rb +98 -0
  192. data/test/js/rule/alert_check_test.rb +37 -0
  193. data/test/js/rule/all_test.rb +23 -0
  194. data/test/js/rule/base_test.rb +34 -0
  195. data/test/js/rule/file_checker_test.rb +131 -0
  196. data/test/js/rule/jq_check_test.rb +90 -0
  197. data/test/js/rule/nest_try_catch_test.rb +71 -0
  198. data/test/js/rule/new_object_and_new_array_test.rb +38 -0
  199. data/test/js/rule/no_eval_test.rb +34 -0
  200. data/test/js/rule/no_global_test.rb +88 -0
  201. data/test/js/rule/private_method_check_test.rb +58 -0
  202. data/test/js/rule/semicolon_test.rb +63 -0
  203. data/test/js/rule/stat_if_with_brace_test.rb +68 -0
  204. data/test/js/rule/stat_if_with_muti_else_test.rb +68 -0
  205. data/test/js/rule/use_strict_equal_test.rb +44 -0
  206. data/test/js/rule_test.rb +47 -0
  207. data/test/js/stat/if.rb +26 -0
  208. data/test/js/stat/iter.rb +115 -0
  209. data/test/js/stat/stat.rb +91 -0
  210. data/test/js/stat/switch.rb +37 -0
  211. data/test/js/stat/try.rb +32 -0
  212. data/test/js/stat/var.rb +38 -0
  213. data/test/parser_visitable_test.rb +102 -0
  214. data/test/position_info_test.rb +66 -0
  215. data/test/rule_dsl/dsl_basic_test.rb +91 -0
  216. data/test/rule_dsl/importing_test.rb +48 -0
  217. data/test/runner/log_level_test.rb +58 -0
  218. metadata +317 -0
data/lib/css/reader.rb ADDED
@@ -0,0 +1,30 @@
1
+ require_relative '../encoding_error'
2
+ require_relative '../helper/file_reader'
3
+
4
+ module XRay
5
+ module CSS
6
+
7
+ class Reader
8
+
9
+ include XRay::Helper
10
+
11
+ def self.read( file, opt = {} )
12
+ source, enc = FileReader::readfile(file)
13
+ declare = get_encoding_declaration(file)
14
+ if declare and enc != declare
15
+ raise EncodingError.new
16
+ end
17
+ source
18
+ end
19
+
20
+ def self.get_encoding_declaration( file )
21
+ begin
22
+ File.open(file, &:readline)[/@charset\s+(['"])(.*?)\1/, 2]
23
+ rescue
24
+ end
25
+ end
26
+ end
27
+
28
+ end
29
+
30
+ end
@@ -0,0 +1,48 @@
1
+ # encoding: utf-8
2
+ require_relative '../../log_entry'
3
+
4
+ module XRay
5
+ module CSS
6
+ module Rule
7
+
8
+ class CompressionChecker
9
+
10
+ def initialize( opt={} )
11
+ @opt = opt.dup
12
+ end
13
+
14
+ def check_file( name )
15
+ check_items([
16
+ :has_minified_in_same_folder
17
+ ], name)
18
+ end
19
+
20
+ def check_has_minified_in_same_folder( name )
21
+ unless is_min_file?(name) or is_merge_file?(name) or File.exist?( name.sub(/\.css$/,'-min.css') )
22
+ [LogEntry.new('发布上线的文件需要压缩,命名规则如a.js->a-min.js,且两者在同一目录下', :error)]
23
+ end
24
+ end
25
+
26
+ protected
27
+ def check_items( items, name )
28
+ results = []
29
+ items.each do |i|
30
+ r = self.send(:"check_#{i}", name)
31
+ results.concat r if Array === r
32
+ end
33
+ results
34
+ end
35
+
36
+ def is_merge_file?( name )
37
+ name =~ /merge\.css$/
38
+ end
39
+
40
+ def is_min_file?( name )
41
+ name =~ /min\.css$/
42
+ end
43
+
44
+ end
45
+
46
+ end
47
+ end
48
+ end
@@ -0,0 +1,45 @@
1
+ # encoding: utf-8
2
+
3
+ require_relative '../../rule'
4
+ require_relative '../../context'
5
+
6
+ module XRay
7
+ module CSS
8
+ module Rule
9
+
10
+ class CheckListRule
11
+
12
+ attr_reader :options
13
+
14
+ include ::XRay::Rule, Context
15
+
16
+ def initialize(options = {})
17
+ @options = options
18
+ end
19
+
20
+ def visit_simple_selector(selector)
21
+ check_css_selector selector
22
+ end
23
+
24
+ def visit_declaration(dec)
25
+ check_declaration dec
26
+ end
27
+
28
+ def visit_ruleset(ruleset)
29
+ check_css_ruleset ruleset
30
+ end
31
+
32
+ def visit_property(property)
33
+ check_css_property property
34
+ end
35
+
36
+ def visit_value(value)
37
+ check_css_value value
38
+ end
39
+
40
+
41
+ end
42
+
43
+ end
44
+ end
45
+ end
data/lib/css/struct.rb ADDED
@@ -0,0 +1,111 @@
1
+ require_relative '../node'
2
+
3
+ module XRay
4
+ module CSS
5
+ Node = XRay::Node
6
+
7
+ class StyleSheet < Node
8
+ attr_reader :statements
9
+
10
+ def initialize(statements)
11
+ @statements = statements
12
+ end
13
+
14
+ def text
15
+ rulesets.collect(&:text).join("\n")
16
+ end
17
+
18
+ def position
19
+ rulesets.empty? ? nil : rulesets[0].position
20
+ end
21
+
22
+ def directives
23
+ statements.select { |elm| elm.is_a? Directive }
24
+ end
25
+
26
+ def rulesets
27
+ statements.select { |elm| elm.is_a? RuleSet }
28
+ end
29
+
30
+ alias :at_rules :directives
31
+ end
32
+
33
+ class Directive < Node
34
+ attr_reader :keyword, :expression, :block
35
+
36
+ def initialize(keyword, expression, block = nil)
37
+ @keyword, @expression, @block = keyword, expression, block
38
+ end
39
+
40
+ def text
41
+ t = "@#{keyword}"
42
+ if expression
43
+ t += "#{expression}"
44
+ end
45
+ if block
46
+ t += "{\n#{block}\n}\n"
47
+ else
48
+ t += ';'
49
+ end
50
+ t
51
+ end
52
+
53
+ def position
54
+ keyword.position
55
+ end
56
+ end
57
+
58
+ class RuleSet < Node
59
+ attr_reader :selector, :declarations
60
+
61
+ def initialize(selector, declarations)
62
+ @selector, @declarations = selector, declarations
63
+ end
64
+
65
+ def text
66
+ decs_text = declarations.collect { |dec|
67
+ "#{' ' * 4}#{dec};"
68
+ }.join("\n")
69
+
70
+ "#{selector} {\n#{decs_text}\n}"
71
+ end
72
+
73
+ def position
74
+ selector.position
75
+ end
76
+ end
77
+
78
+ class Selector < Node
79
+ attr_reader :simple_selectors
80
+
81
+ def initialize(simple_selectors)
82
+ @simple_selectors = simple_selectors
83
+ end
84
+
85
+ def text
86
+ @simple_selectors.collect(&:text).join(', ')
87
+ end
88
+
89
+ def position
90
+ simple_selectors.empty? ? nil : simple_selectors[0].position
91
+ end
92
+ end
93
+
94
+ class Declaration < Node
95
+ attr_reader :property, :value
96
+
97
+ def initialize(property, value)
98
+ @property, @value = property, value
99
+ end
100
+
101
+ def text
102
+ "#{property}: #{value}"
103
+ end
104
+
105
+ def position
106
+ property.position
107
+ end
108
+ end
109
+
110
+ end
111
+ end
@@ -0,0 +1,6 @@
1
+ module XRay
2
+
3
+ class EncodingError < RuntimeError
4
+ end
5
+
6
+ end
@@ -0,0 +1,38 @@
1
+ module XRay
2
+
3
+ class FileValidator
4
+ def initialize( options )
5
+ @options = options
6
+ @validators = []
7
+ end
8
+
9
+ def add_validator( val )
10
+ @validators << val
11
+ end
12
+
13
+ def add_validators( vals)
14
+ vals.each { |val| add_validator val }
15
+ end
16
+
17
+ def check( file )
18
+ results = []
19
+ @validators.each do |val|
20
+ if val.respond_to? :check_file
21
+ val_results = val.check_file(file)
22
+ if val_results
23
+ if val_results.is_a? Array
24
+ results.concat val_results
25
+ else
26
+ results << val_results
27
+ end
28
+ end
29
+ end
30
+ end
31
+ results
32
+ end
33
+
34
+ alias_method :validate, :check
35
+
36
+ end
37
+
38
+ end
@@ -0,0 +1,50 @@
1
+ class CodeType
2
+
3
+ class << self
4
+
5
+ public
6
+ def guess(text, filename=nil)
7
+ if filename && !filename.empty?
8
+ guess_by_name filename
9
+ else
10
+ guess_by_content text
11
+ end
12
+ end
13
+
14
+ def guess_by_name( filename )
15
+ case File.extname( filename )
16
+ when /\.css$/i
17
+ :css
18
+ when /\.js$/i
19
+ :js
20
+ else
21
+ :html #TODO: support more suffix
22
+ end
23
+ end
24
+
25
+ def guess_by_content(text)
26
+ return :html if is_html? text
27
+ return :css if is_css? text
28
+ :js #TODO: support more code syntaxes
29
+ end
30
+
31
+ def is_style_file?(filename)
32
+ File.extname( filename ) =~ /(css|js|html?)$/i
33
+ end
34
+
35
+ def scope(filename)
36
+ filename =~ /[\\\/]lib[\\\/]/ ? 'lib' : 'page'
37
+ end
38
+
39
+ private
40
+ def is_html?(text)
41
+ /^\s*</m =~ text
42
+ end
43
+
44
+ def is_css?(text)
45
+ /^\s*@/m =~ text or /^\s*([-\*:\.#_\w]+\s*)+\{/ =~ text
46
+ end
47
+
48
+ end
49
+
50
+ end
@@ -0,0 +1,44 @@
1
+ class ColorString
2
+
3
+ @@colors = %w(black red green yellow blue magenta cyan white)
4
+
5
+ def self.colors
6
+ @@colors
7
+ end
8
+
9
+ def initialize(str, color=nil, bg=nil)
10
+ @str, @color, @bg = str, color, bg
11
+ end
12
+
13
+ def to_s
14
+ return @str if @color.nil? and @bg.nil?
15
+ s = []
16
+ s << "3#{@color}" unless @color.nil?
17
+ s << "4#{@bg}" unless @bg.nil?
18
+ "\e[#{s.join(';')}m" << @str << "\e[0m"
19
+ end
20
+
21
+ def inspect
22
+ "#<ColorString \"#{@str}\", color:#{@color}, bg:#{@bg}>"
23
+ end
24
+
25
+ String.public_instance_methods.each do |m|
26
+ unless self.respond_to? m
27
+ define_method(m) { |*arg| to_s.send( m, *arg ) }
28
+ end
29
+ end
30
+
31
+ @@colors.each_with_index do |color, i|
32
+ String.class_eval do
33
+ define_method(color) { ColorString.new(self, i) }
34
+ define_method(color << '_bg') { ColorString.new(self, nil, i) }
35
+ end
36
+
37
+ ColorString.class_eval do
38
+ define_method(color) { @color = i; self }
39
+ define_method(color << '_bg') { @bg = i; self }
40
+ end
41
+
42
+ end
43
+ end
44
+
@@ -0,0 +1,22 @@
1
+ require_relative 'strenc'
2
+
3
+ module XRay
4
+ module Helper
5
+ module FileReader
6
+ extend self
7
+
8
+ # auto detect file encoding and read it.
9
+ # return with an array containing string
10
+ # and encoding
11
+ def readfile(path, opt={})
12
+ if File.readable?(path)
13
+ bin = File.read(path).utf8!
14
+ [bin, bin.former_enc ||'ascii-8bit' ]
15
+ else
16
+ raise ArgumentError.new("File is not readable!")
17
+ end
18
+ end
19
+
20
+ end
21
+ end
22
+ end
@@ -0,0 +1,65 @@
1
+ # encoding: utf-8
2
+
3
+ class String
4
+
5
+ @@encs = %w(ascii utf-8 gb18030 gbk gb2312 cp936 big5)
6
+
7
+ class << self
8
+ def encs; @@encs; end
9
+ def encs=(arr); @@encs = arr; end
10
+ end
11
+
12
+ if public_instance_methods.include? :encode!
13
+ def try_convert to, from
14
+ encode!( to, from ).force_encoding( to )
15
+ valid_encoding?
16
+ end
17
+ else
18
+ # for ruby 1.8
19
+ require 'iconv'
20
+ def try_convert to, from
21
+ text = Iconv.new(to, from).iconv(self)
22
+ replace(text) if self =~ /./
23
+ end
24
+ end
25
+
26
+ attr_reader :former_enc
27
+
28
+ def enc!(encoding)
29
+
30
+ if respond_to? :force_encoding
31
+ force_encoding encoding
32
+ if valid_encoding?
33
+ @former_enc = encoding
34
+ return self
35
+ end
36
+ end
37
+
38
+ @@encs.each do |c|
39
+ begin
40
+ if send :try_convert, encoding, c
41
+ @former_enc = c
42
+ break
43
+ end
44
+ rescue
45
+ end
46
+ end
47
+ self
48
+ end
49
+
50
+ @@encs.each do |tar|
51
+ mtd = tar.gsub(/-/, '') << "!"
52
+ define_method mtd do
53
+ enc! tar
54
+ end
55
+ public mtd
56
+ end
57
+
58
+ private :try_convert
59
+
60
+ end
61
+
62
+
63
+ if __FILE__ == $0
64
+ puts File.read('test/fixtures/html/1-1.html').utf8!
65
+ end
@@ -0,0 +1,212 @@
1
+ require_relative '../base_parser'
2
+ require_relative '../css/parser'
3
+ require_relative '../js/parser'
4
+ require_relative 'struct'
5
+
6
+ module XRay; module HTML
7
+
8
+ class Parser < BaseParser
9
+
10
+ def self.parse(src, &block)
11
+ parser = self.new(src)
12
+ doc = parser.parse
13
+ yield doc if block_given?
14
+ doc
15
+ end
16
+
17
+ TEXT = /[^<]+/m
18
+ PROP_NAME = %r/\w[-:\w]*/m
19
+ PROP_VALUE = %r/'([^']*)'|"([^"]*)"|([^\s>]+)/m
20
+ PROP = %r/#{PROP_NAME}\s*(?:=\s*#{PROP_VALUE})?/m
21
+ TAG_NAME = /\w[^>\(\)\/\s]*/
22
+ TAG_START = %r/<(#{TAG_NAME})/m
23
+ TAG_END = %r/<\/#{TAG_NAME}\s*>/m
24
+ TAG = %r/#{TAG_START}(\s+#{PROP})*\s*>/m
25
+ SELF_CLOSE_TAG = %r/#{TAG_START}(\s+#{PROP})*\s*\/>/m
26
+ DTD = /\s*<!(doctype)\s+(.*?)>/im
27
+ COMMENT = /<!--(.*?)-->/m
28
+
29
+ def parse
30
+ parse_doc
31
+ end
32
+
33
+ def parse_doc
34
+ nodes = batch(:parse_element)
35
+ case nodes.size
36
+ when 0 then nil
37
+ when 1 then nodes[0]
38
+ else
39
+ ::XRay::HTML::Document.new( nodes )
40
+ end
41
+ end
42
+
43
+ def parse_element
44
+ if @scanner.check(DTD) and !@dtd_checked
45
+ @dtd_checked = true
46
+ parse_dtd
47
+ elsif @scanner.check(COMMENT)
48
+ parse_comment
49
+ elsif @scanner.check(TAG_START)
50
+ parse_tag
51
+ elsif !text_end?
52
+ parse_text
53
+ else
54
+ parse_error('Invalid HTML struct')
55
+ end
56
+ end
57
+
58
+ def parse_dtd
59
+ node = scan(DTD)
60
+ DTDElement.new(@scanner[2], @scanner[1], node.position)
61
+ end
62
+
63
+ def parse_comment
64
+ scan COMMENT
65
+ CommentElement.new(@scanner[1])
66
+ end
67
+
68
+ def parse_text
69
+ text = ''
70
+ until text_end? do
71
+ text << '<' if @scanner.skip(/</)
72
+ text << "#{@scanner.scan(TEXT)}"
73
+
74
+ # TODO: make this detection a rule
75
+ parse_warn "'#{$~}' not escaped" if text =~ /<|>/
76
+ end
77
+ TextElement.new text
78
+ end
79
+
80
+ def parse_tag
81
+ if @scanner.check DTD
82
+ parse_dtd_tag
83
+ elsif @scanner.check SELF_CLOSE_TAG
84
+ parse_self_ending_tag
85
+ elsif @scanner.check TAG
86
+ parse_normal_tag
87
+ else
88
+ parse_error('Invalid HTML struct')
89
+ end
90
+ end
91
+
92
+ def parse_properties
93
+ skip_empty
94
+ props = []
95
+ until prop_search_done? do
96
+ prop = parse_property
97
+ props << prop if prop
98
+ skip_empty
99
+ end
100
+ props
101
+ end
102
+
103
+ def parse_property
104
+ name = parse_prop_name
105
+ if @scanner.check( /\s*=/ )
106
+ skip /[=]/
107
+ sep = @scanner.check(/['"]/)
108
+ value = parse_prop_value
109
+ end
110
+ Property.new name, value, sep
111
+ end
112
+
113
+ def parse_prop_name
114
+ scan PROP_NAME
115
+ end
116
+
117
+ def parse_prop_value
118
+ scan PROP_VALUE
119
+ "#{@scanner[1]}#{@scanner[2]}#{@scanner[3]}"
120
+ end
121
+
122
+ protected
123
+ def prop_search_done?
124
+ @scanner.check(/\/>|>/) or @scanner.eos?
125
+ end
126
+
127
+ def parse_normal_tag
128
+ skip /</
129
+ tag, prop = scan(TAG_NAME), parse_properties
130
+ @parsing_script = tag =~ /^script$/i
131
+ skip />/
132
+
133
+ scopes << tag.text
134
+
135
+ children = []
136
+ ending = nil
137
+ begin
138
+ end_tag = %r(<#{tag.text.sub(/^(?!=\/)/, '\/')}>)i
139
+ rescue
140
+ raise ::XRay::ParseError.new("invalid tag name: #{tag.text}", scanner_pos)
141
+ end
142
+
143
+ if auto_close?(tag.text) and !@scanner.check(end_tag)
144
+ close_type = :none
145
+ else
146
+ until @scanner.check(TAG_END) or @scanner.eos? do
147
+ child = parse_element
148
+ children << child if child
149
+ end
150
+ begin
151
+ ending = scan(end_tag).text
152
+ close_type = :after
153
+ rescue => e
154
+ close_type = :none
155
+ raise e
156
+ end
157
+ end
158
+ @parsing_script = false
159
+
160
+ scopes.pop
161
+
162
+ el = Element.new(tag, prop, children, close_type, ending)
163
+ el.scopes = scopes.dup
164
+ el
165
+ end
166
+
167
+ def scopes
168
+ @scopes ||= []
169
+ end
170
+
171
+ def parse_dtd_tag
172
+ scan DTD
173
+ end
174
+
175
+ def parse_self_ending_tag
176
+ skip /</
177
+ tag = scan(TAG_NAME)
178
+ prop = parse_properties
179
+ skip /\/>/
180
+ el = Element.new(tag, prop, [], :self)
181
+ el.scopes = scopes.dup
182
+ el
183
+ end
184
+
185
+ def auto_close?(tag)
186
+ XRay::HTML::AUTO_CLOSE_TAGS.include?(tag.to_s.downcase)
187
+ end
188
+
189
+ def text_end?
190
+
191
+ return true if @scanner.eos?
192
+
193
+ if @parsing_script
194
+ @scanner.check(/<\/script\s*>/)
195
+ else
196
+ @scanner.check(%r(#{TAG}|#{SELF_CLOSE_TAG}|#{TAG_END}|#{COMMENT}))
197
+ end
198
+ end
199
+
200
+ end
201
+
202
+
203
+ end; end
204
+
205
+ if __FILE__ == $0
206
+ XRay::HTML::Parser.parse(%q(<div class="info" checked>information</div>)) { |e| puts e.outer_html }
207
+ XRay::HTML::Parser.parse(%q(<img width="100" height='150' id=img > <center>text</center>)) { |e| puts e.first.outer_html }
208
+ XRay::HTML::Parser.parse(%q(<center><div><div><center>text</center></div></div></center>)) { |e| puts e.outer_html }
209
+ XRay::HTML::Parser.parse(%q(<center ns:name="value"><div><div><center>text</center></div></div></center>)) { |e| puts e.outer_html }
210
+ begin; XRay::HTML::Parser.parse(%q(<center><div></center></div>)) { |e| puts e.outer_html }; rescue; end
211
+ XRay::HTML::Parser.parse('<br/>') { |e| puts e.outer_html }
212
+ end