metasm 1.0.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (192) hide show
  1. data/BUGS +11 -0
  2. data/CREDITS +17 -0
  3. data/README +270 -0
  4. data/TODO +114 -0
  5. data/doc/code_organisation.txt +146 -0
  6. data/doc/const_missing.txt +16 -0
  7. data/doc/core_classes.txt +75 -0
  8. data/doc/feature_list.txt +53 -0
  9. data/doc/index.txt +59 -0
  10. data/doc/install_notes.txt +170 -0
  11. data/doc/style.css +3 -0
  12. data/doc/use_cases.txt +18 -0
  13. data/lib/metasm.rb +80 -0
  14. data/lib/metasm/arm.rb +12 -0
  15. data/lib/metasm/arm/debug.rb +39 -0
  16. data/lib/metasm/arm/decode.rb +167 -0
  17. data/lib/metasm/arm/encode.rb +77 -0
  18. data/lib/metasm/arm/main.rb +75 -0
  19. data/lib/metasm/arm/opcodes.rb +177 -0
  20. data/lib/metasm/arm/parse.rb +130 -0
  21. data/lib/metasm/arm/render.rb +55 -0
  22. data/lib/metasm/compile_c.rb +1457 -0
  23. data/lib/metasm/dalvik.rb +8 -0
  24. data/lib/metasm/dalvik/decode.rb +196 -0
  25. data/lib/metasm/dalvik/main.rb +60 -0
  26. data/lib/metasm/dalvik/opcodes.rb +366 -0
  27. data/lib/metasm/decode.rb +213 -0
  28. data/lib/metasm/decompile.rb +2659 -0
  29. data/lib/metasm/disassemble.rb +2068 -0
  30. data/lib/metasm/disassemble_api.rb +1280 -0
  31. data/lib/metasm/dynldr.rb +1329 -0
  32. data/lib/metasm/encode.rb +333 -0
  33. data/lib/metasm/exe_format/a_out.rb +194 -0
  34. data/lib/metasm/exe_format/autoexe.rb +82 -0
  35. data/lib/metasm/exe_format/bflt.rb +189 -0
  36. data/lib/metasm/exe_format/coff.rb +455 -0
  37. data/lib/metasm/exe_format/coff_decode.rb +901 -0
  38. data/lib/metasm/exe_format/coff_encode.rb +1078 -0
  39. data/lib/metasm/exe_format/dex.rb +457 -0
  40. data/lib/metasm/exe_format/dol.rb +145 -0
  41. data/lib/metasm/exe_format/elf.rb +923 -0
  42. data/lib/metasm/exe_format/elf_decode.rb +979 -0
  43. data/lib/metasm/exe_format/elf_encode.rb +1375 -0
  44. data/lib/metasm/exe_format/macho.rb +827 -0
  45. data/lib/metasm/exe_format/main.rb +228 -0
  46. data/lib/metasm/exe_format/mz.rb +164 -0
  47. data/lib/metasm/exe_format/nds.rb +172 -0
  48. data/lib/metasm/exe_format/pe.rb +437 -0
  49. data/lib/metasm/exe_format/serialstruct.rb +246 -0
  50. data/lib/metasm/exe_format/shellcode.rb +114 -0
  51. data/lib/metasm/exe_format/xcoff.rb +167 -0
  52. data/lib/metasm/gui.rb +23 -0
  53. data/lib/metasm/gui/cstruct.rb +373 -0
  54. data/lib/metasm/gui/dasm_coverage.rb +199 -0
  55. data/lib/metasm/gui/dasm_decomp.rb +369 -0
  56. data/lib/metasm/gui/dasm_funcgraph.rb +103 -0
  57. data/lib/metasm/gui/dasm_graph.rb +1354 -0
  58. data/lib/metasm/gui/dasm_hex.rb +543 -0
  59. data/lib/metasm/gui/dasm_listing.rb +599 -0
  60. data/lib/metasm/gui/dasm_main.rb +906 -0
  61. data/lib/metasm/gui/dasm_opcodes.rb +291 -0
  62. data/lib/metasm/gui/debug.rb +1228 -0
  63. data/lib/metasm/gui/gtk.rb +884 -0
  64. data/lib/metasm/gui/qt.rb +495 -0
  65. data/lib/metasm/gui/win32.rb +3004 -0
  66. data/lib/metasm/gui/x11.rb +621 -0
  67. data/lib/metasm/ia32.rb +14 -0
  68. data/lib/metasm/ia32/compile_c.rb +1523 -0
  69. data/lib/metasm/ia32/debug.rb +193 -0
  70. data/lib/metasm/ia32/decode.rb +1167 -0
  71. data/lib/metasm/ia32/decompile.rb +564 -0
  72. data/lib/metasm/ia32/encode.rb +314 -0
  73. data/lib/metasm/ia32/main.rb +233 -0
  74. data/lib/metasm/ia32/opcodes.rb +872 -0
  75. data/lib/metasm/ia32/parse.rb +327 -0
  76. data/lib/metasm/ia32/render.rb +91 -0
  77. data/lib/metasm/main.rb +1193 -0
  78. data/lib/metasm/mips.rb +11 -0
  79. data/lib/metasm/mips/compile_c.rb +7 -0
  80. data/lib/metasm/mips/decode.rb +253 -0
  81. data/lib/metasm/mips/encode.rb +51 -0
  82. data/lib/metasm/mips/main.rb +72 -0
  83. data/lib/metasm/mips/opcodes.rb +443 -0
  84. data/lib/metasm/mips/parse.rb +51 -0
  85. data/lib/metasm/mips/render.rb +43 -0
  86. data/lib/metasm/os/gnu_exports.rb +270 -0
  87. data/lib/metasm/os/linux.rb +1112 -0
  88. data/lib/metasm/os/main.rb +1686 -0
  89. data/lib/metasm/os/remote.rb +527 -0
  90. data/lib/metasm/os/windows.rb +2027 -0
  91. data/lib/metasm/os/windows_exports.rb +745 -0
  92. data/lib/metasm/parse.rb +876 -0
  93. data/lib/metasm/parse_c.rb +3938 -0
  94. data/lib/metasm/pic16c/decode.rb +42 -0
  95. data/lib/metasm/pic16c/main.rb +17 -0
  96. data/lib/metasm/pic16c/opcodes.rb +68 -0
  97. data/lib/metasm/ppc.rb +11 -0
  98. data/lib/metasm/ppc/decode.rb +264 -0
  99. data/lib/metasm/ppc/decompile.rb +251 -0
  100. data/lib/metasm/ppc/encode.rb +51 -0
  101. data/lib/metasm/ppc/main.rb +129 -0
  102. data/lib/metasm/ppc/opcodes.rb +410 -0
  103. data/lib/metasm/ppc/parse.rb +52 -0
  104. data/lib/metasm/preprocessor.rb +1277 -0
  105. data/lib/metasm/render.rb +130 -0
  106. data/lib/metasm/sh4.rb +8 -0
  107. data/lib/metasm/sh4/decode.rb +336 -0
  108. data/lib/metasm/sh4/main.rb +292 -0
  109. data/lib/metasm/sh4/opcodes.rb +381 -0
  110. data/lib/metasm/x86_64.rb +12 -0
  111. data/lib/metasm/x86_64/compile_c.rb +1025 -0
  112. data/lib/metasm/x86_64/debug.rb +59 -0
  113. data/lib/metasm/x86_64/decode.rb +268 -0
  114. data/lib/metasm/x86_64/encode.rb +264 -0
  115. data/lib/metasm/x86_64/main.rb +135 -0
  116. data/lib/metasm/x86_64/opcodes.rb +118 -0
  117. data/lib/metasm/x86_64/parse.rb +68 -0
  118. data/misc/bottleneck.rb +61 -0
  119. data/misc/cheader-findpppath.rb +58 -0
  120. data/misc/hexdiff.rb +74 -0
  121. data/misc/hexdump.rb +55 -0
  122. data/misc/metasm-all.rb +13 -0
  123. data/misc/objdiff.rb +47 -0
  124. data/misc/objscan.rb +40 -0
  125. data/misc/pdfparse.rb +661 -0
  126. data/misc/ppc_pdf2oplist.rb +192 -0
  127. data/misc/tcp_proxy_hex.rb +84 -0
  128. data/misc/txt2html.rb +440 -0
  129. data/samples/a.out.rb +31 -0
  130. data/samples/asmsyntax.rb +77 -0
  131. data/samples/bindiff.rb +555 -0
  132. data/samples/compilation-steps.rb +49 -0
  133. data/samples/cparser_makestackoffset.rb +55 -0
  134. data/samples/dasm-backtrack.rb +38 -0
  135. data/samples/dasmnavig.rb +318 -0
  136. data/samples/dbg-apihook.rb +228 -0
  137. data/samples/dbghelp.rb +143 -0
  138. data/samples/disassemble-gui.rb +102 -0
  139. data/samples/disassemble.rb +133 -0
  140. data/samples/dump_upx.rb +95 -0
  141. data/samples/dynamic_ruby.rb +1929 -0
  142. data/samples/elf_list_needed.rb +46 -0
  143. data/samples/elf_listexports.rb +33 -0
  144. data/samples/elfencode.rb +25 -0
  145. data/samples/exeencode.rb +128 -0
  146. data/samples/factorize-headers-elfimports.rb +77 -0
  147. data/samples/factorize-headers-peimports.rb +109 -0
  148. data/samples/factorize-headers.rb +43 -0
  149. data/samples/gdbclient.rb +583 -0
  150. data/samples/generate_libsigs.rb +102 -0
  151. data/samples/hotfix_gtk_dbg.rb +59 -0
  152. data/samples/install_win_env.rb +78 -0
  153. data/samples/lindebug.rb +924 -0
  154. data/samples/linux_injectsyscall.rb +95 -0
  155. data/samples/machoencode.rb +31 -0
  156. data/samples/metasm-shell.rb +91 -0
  157. data/samples/pe-hook.rb +69 -0
  158. data/samples/pe-ia32-cpuid.rb +203 -0
  159. data/samples/pe-mips.rb +35 -0
  160. data/samples/pe-shutdown.rb +78 -0
  161. data/samples/pe-testrelocs.rb +51 -0
  162. data/samples/pe-testrsrc.rb +24 -0
  163. data/samples/pe_listexports.rb +31 -0
  164. data/samples/peencode.rb +19 -0
  165. data/samples/peldr.rb +494 -0
  166. data/samples/preprocess-flatten.rb +19 -0
  167. data/samples/r0trace.rb +308 -0
  168. data/samples/rubstop.rb +399 -0
  169. data/samples/scan_pt_gnu_stack.rb +54 -0
  170. data/samples/scanpeexports.rb +62 -0
  171. data/samples/shellcode-c.rb +40 -0
  172. data/samples/shellcode-dynlink.rb +146 -0
  173. data/samples/source.asm +34 -0
  174. data/samples/struct_offset.rb +47 -0
  175. data/samples/testpe.rb +32 -0
  176. data/samples/testraw.rb +45 -0
  177. data/samples/win32genloader.rb +132 -0
  178. data/samples/win32hooker-advanced.rb +169 -0
  179. data/samples/win32hooker.rb +96 -0
  180. data/samples/win32livedasm.rb +33 -0
  181. data/samples/win32remotescan.rb +133 -0
  182. data/samples/wintrace.rb +92 -0
  183. data/tests/all.rb +8 -0
  184. data/tests/dasm.rb +39 -0
  185. data/tests/dynldr.rb +35 -0
  186. data/tests/encodeddata.rb +132 -0
  187. data/tests/ia32.rb +82 -0
  188. data/tests/mips.rb +116 -0
  189. data/tests/parse_c.rb +239 -0
  190. data/tests/preprocessor.rb +269 -0
  191. data/tests/x86_64.rb +62 -0
  192. metadata +255 -0
@@ -0,0 +1,192 @@
1
+ # This file is part of Metasm, the Ruby assembly manipulation suite
2
+ # Copyright (C) 2006-2009 Yoann GUILLOT
3
+ #
4
+ # Licence is LGPL, see LICENCE in the top-level directory
5
+ #
6
+ # parses the PPC specification PDF to generate the opcode list
7
+ #
8
+
9
+ require 'pdfparse'
10
+
11
+ $field_mask = {}
12
+ $field_shift = {}
13
+ $opcodes = []
14
+ def make_instr(bins, bits, text)
15
+ # calc bitfields length from their offset
16
+ last = 32
17
+ bitlen = []
18
+ bits.reverse_each { |bit|
19
+ bitlen.unshift last-bit
20
+ last = bit
21
+ }
22
+
23
+ # the opcode binary value (w/o fields)
24
+ bin = 0
25
+ fields = []
26
+
27
+ # parse the data
28
+ bins.zip(bits, bitlen).each { |val, off, len|
29
+ off = 32-(off+len)
30
+ msk = (1 << len) - 1
31
+ case val
32
+ when '/', '//', '///' # reserved field, value unspecified
33
+ when /^\d+$/; bin |= val.to_i << off # constant field
34
+ when /^[A-Za-z]+$/
35
+ fld = val.downcase.to_sym
36
+ fld = "#{fld}_".to_sym while $field_mask[fld] and ($field_mask[fld] != msk or $field_shift[fld] != off)
37
+ fields << fld
38
+ $field_mask[fld] ||= msk
39
+ $field_shift[fld] ||= off
40
+ end
41
+ }
42
+
43
+ text.each { |txt|
44
+ # fnabs FRT,FRB (Rc=0)
45
+ curbin = bin
46
+ curfields = fields.dup
47
+ txt.sub!(' Rc=1)', ' (Rc=1)') if txt.include? 'fdiv.' # typo: fdiv. has no '('
48
+ if txt =~ /(.*\S)\s*\((\w+=.*)\)/
49
+ txt = $1
50
+ $2.split.each { |e|
51
+ raise e if e !~ /(\w+)=(\d+)/
52
+ name, val = $1.downcase, $2.to_i
53
+ raise "bad bit #{name} in #{txt}" if not fld = curfields.find { |fld_| fld_.to_s.delete('_') == name }
54
+ curfields.delete fld
55
+ curbin |= val << $field_shift[fld]
56
+ }
57
+ end
58
+ opname, args = txt.split(/\s+/, 2)
59
+ args = args.to_s.downcase.split(/\s*,\s*/).map { |arg| fld = curfields.find { |fld_| fld_.to_s.delete('_') == arg } ; curfields.delete fld ; fld }
60
+ if args.include? nil and curfields.length == 2 and (curfields - [:ra, :d]).empty?
61
+ args[args.index(nil)] = :ra_i16
62
+ curfields.clear
63
+ elsif args.include? nil and curfields.length == 2 and (curfields - [:ra, :ds]).empty?
64
+ args[args.index(nil)] = :ra_i16s
65
+ curfields.clear
66
+ elsif args.include? nil and curfields.length == 2 and (curfields - [:ra, :dq]).empty?
67
+ args[args.index(nil)] = :ra_i16q
68
+ curfields.clear
69
+ elsif args.include? nil and curfields.length == 1
70
+ args[args.index(nil)] = curfields.shift
71
+ end
72
+ raise "bad args #{args.inspect} (#{curfields.inspect}) in #{txt}" if args.include? nil
73
+ $opcodes << [opname, curbin, args]
74
+
75
+ n = (opname.inspect << ',').ljust(10) + '0x%08X' % curbin
76
+ n << ', ' if not args.empty?
77
+ puts "\taddop " + n + args.map { |e| e.inspect }.join(', ')
78
+ }
79
+ end
80
+
81
+ # handle instruction aliases
82
+ # NOT WORKING
83
+ # should be implemented in the parser/displayer instead of opcode list
84
+ # manual work needed for eg conditionnal jumps
85
+ def make_alias(newop, newargs, oldop, oldargs)
86
+ raise "unknown alias #{newop} => #{oldop}" if not op = $opcodes.reverse.find { |op_| op_[0] == oldop }
87
+ op2 = op.dup
88
+ op2[0] = newop
89
+ oldargs.each_with_index { |oa, i|
90
+ # XXX bcctr 4, 6 -> bcctr 4, 6, 0 => not the work
91
+ if oa =~ /^[0-9]+$/ or oa =~ /^0x[0-9a-f]+$/i
92
+ fld = op[2][i]
93
+ op2[1] |= Integer(oa) << $field_shift[fld]
94
+ end
95
+ }
96
+ puts "#\talias #{newop} #{newargs.join(', ')} -> #{oldop} #{oldargs.join(', ')}".downcase
97
+ end
98
+
99
+ require 'enumerator'
100
+ def epilog
101
+ puts "\n\t@field_shift = {"
102
+ puts $field_shift.sort_by { |k, v| k.to_s }.enum_slice(6).map { |slc|
103
+ "\t\t" + slc.map { |k, v| "#{k.inspect} => #{v}" }.join(', ')
104
+ }.join(",\n")
105
+ puts "\t}"
106
+ puts "\n\t@field_mask = {"
107
+ puts $field_mask.sort_by { |k, v| k.to_s }.enum_slice(6).map { |slc|
108
+ "\t\t" + slc.map { |k, v| "#{k.inspect} => #{v > 1000 ? '0x%X' % v : v}" }.join(', ')
109
+ }.join(",\n")
110
+ puts "\t}"
111
+ end
112
+
113
+ $foundop = false
114
+ def parse_page(lines)
115
+ # all instr defining pages include this
116
+ return unless lines.find { |l| l.str =~ /Special Registers Altered|Memory Barrier Instructions|Data Cache Instructions/ } # sync L/dcbt
117
+
118
+ ilist = [] # line buffer
119
+ extended = false
120
+
121
+ # concat lines with same y
122
+ lines = lines.sort_by { |l| [-l.y, l.x] }
123
+ lastline = nil
124
+ lines.delete_if { |l|
125
+ if lastline and lastline.y == l.y and ([lastline.fontx, lastline.fonty] == [l.fontx, l.fonty] or l.str =~ /^\s*$/)
126
+ lastline.str << ' ' << l.str
127
+ true
128
+ else
129
+ lastline = l
130
+ false
131
+ end
132
+ }
133
+
134
+ lines.each { |l|
135
+ # search for the bit indices list
136
+ if l.fonty < 7 and l.str =~ /^0 [\d ]+ 31\s*$/ and (ilist.last.str.split.length == l.str.split.length or ilist.last.str.split.length == l.str.split.length-1)
137
+ $foundop = true
138
+ bitindices = l.str.split.map { |i| i.to_i }
139
+ # previous line is the binary encoding
140
+ encoding = ilist.pop.str.split
141
+ bitindices.pop if encoding.length < bitindices.length
142
+ # previous line is the instruction text format
143
+ ilist.pop if ilist.last.str =~ /\[POWER2? mnemonics?: (.*)\]/
144
+ text = []
145
+ text.unshift l while l = ilist.pop and l = l.str and (l =~ /,|\)$/ or text.empty?)
146
+ ilist = []
147
+ make_instr(encoding, bitindices, text)
148
+ elsif l.str.include? 'Special Registers Altered'
149
+ if not $foundop
150
+ puts ilist.map { |l_| "(#{l_.y}) #{l_.str}" }
151
+ puts lines.map { |l_| "(#{l_.y}) #{l_.str}" } if ilist.empty?
152
+ raise 'nofoundop'
153
+ else
154
+ $foundop = false
155
+ end
156
+ elsif l.str =~ /Extended:\s+Equivalent to:/
157
+ extended = true
158
+ elsif extended
159
+ if l.str.include? ',' and l.str =~ /^(\S+)\s+(\S+)\s+(\S+)\s+(.*)/ and $opcodes.find { |op| op[0] == $3 }
160
+ newop, newargs, exop, exargs = $1, $2, $3, $4
161
+ make_alias(newop, newargs.split(','), exop, exargs.split(','))
162
+ else extended = false
163
+ end
164
+ else ilist << l
165
+ end
166
+ }
167
+ end
168
+
169
+ # PowerPC Architecture v2.02:
170
+ # 1 - User Instruction Set
171
+ # 2 - Virtual Environment
172
+ # 3 - Operating Environment
173
+ Dir['PPC_Vers202_Book?_public.pdf'].sort.each { |book|
174
+ $stderr.puts book if $stderr.tty?
175
+ pdf = PDF.read book
176
+ pagecount = pdf.trailer['Root']['Pages']['Count'] || 0
177
+ curpage = 0
178
+ pdf.each_page { |p|
179
+ $stderr.print "#{curpage+=1}/#{pagecount} \r" if $stderr.tty?
180
+ p.clip_lines(50, 740)
181
+ list = p.lines.flatten
182
+
183
+ # split columns
184
+ sp1, sp2 = list.partition { |l| l.x < 288 }
185
+
186
+ parse_page(sp1)
187
+ parse_page(sp2)
188
+ }
189
+ $stderr.print " \r" if $stderr.tty?
190
+ }
191
+
192
+ epilog()
@@ -0,0 +1,84 @@
1
+ #!/usr/bin/ruby
2
+ # This file is part of Metasm, the Ruby assembly manipulation suite
3
+ # Copyright (C) 2006-2009 Yoann GUILLOT
4
+ #
5
+ # Licence is LGPL, see LICENCE in the top-level directory
6
+
7
+
8
+ # this is a TCP proxy which dumps the transmitted data in hex on stdout
9
+ # usage: tcp_proxy <lhost> <lport> <rhost> <rport> [<timeout_s>]
10
+
11
+ require 'socket'
12
+ require File.join(File.dirname(__FILE__), 'hexdump')
13
+
14
+ def bouncepkt(clt, srv, timeout=nil)
15
+ s2c = ''
16
+ c2s = ''
17
+ loop do
18
+ break if not IO.select([clt, srv], nil, nil, timeout)
19
+
20
+ while srv and s2c.length < 1024*16 and IO.select([srv], nil, nil, 0)
21
+ str = (srv.read(1) rescue nil)
22
+ if not str or str.empty?
23
+ srv = false
24
+ else
25
+ s2c << str
26
+ end
27
+ end
28
+
29
+ while clt and c2s.length < 1024*16 and IO.select([clt], nil, nil, 0)
30
+ str = (clt.read(1) rescue nil)
31
+ if not str or str.empty?
32
+ clt = false
33
+ else
34
+ c2s << str
35
+ end
36
+ end
37
+
38
+ if clt and s2c.length > 0 and IO.select(nil, [clt], nil, 0)
39
+ puts Time.now.strftime('s -> c %H:%M:%S')
40
+ s2c.hexdump(:fmt => ['c', 'a'])
41
+ clt.write s2c
42
+ s2c.replace ''
43
+ end
44
+
45
+ if srv and c2s.length > 0 and IO.select(nil, [srv], nil, 0)
46
+ puts Time.now.strftime('c -> s %H:%M:%S')
47
+ c2s.hexdump(:fmt => ['c', 'a'])
48
+ srv.write c2s
49
+ c2s.replace ''
50
+ end
51
+ break if not clt or not srv
52
+ end
53
+ end
54
+
55
+
56
+ if $0 == __FILE__
57
+ if ARGV.length < 4
58
+ abort "usage: bnc <lhost> <lport> <rhost> <rport> [<timeout_s>]"
59
+ end
60
+
61
+ lhost = ARGV.shift
62
+ lport = Integer(ARGV.shift)
63
+ rhost = ARGV.shift
64
+ rport = Integer(ARGV.shift)
65
+ timeout = Float(ARGV.shift) if not ARGV.empty?
66
+
67
+ s = TCPServer.new(lhost, lport)
68
+
69
+ loop do
70
+ puts "waiting..."
71
+ a = s.accept
72
+ puts "incoming connection"
73
+ c = TCPSocket.new(rhost, rport)
74
+
75
+ begin
76
+ bouncepkt(a, c, timeout)
77
+ rescue SystemCallError
78
+ end
79
+
80
+ puts "connection closed"
81
+ a.close
82
+ c.close
83
+ end
84
+ end
@@ -0,0 +1,440 @@
1
+ #!/usr/bin/env ruby
2
+ # This file is part of Metasm, the Ruby assembly manipulation suite
3
+ # Copyright (C) 2006-2009 Yoann GUILLOT
4
+ #
5
+ # Licence is LGPL, see LICENCE in the top-level directory
6
+
7
+ # This scripts is used to compile the Metasm documentation into html files
8
+ # Losely inspired from the rst syntax
9
+
10
+ # stuff to generate html code
11
+ module Html
12
+ class Elem
13
+ attr_reader :name, :attrs, :content, :style
14
+
15
+ IndentAdd = ' '
16
+ LineLenMax = 80
17
+
18
+ def initialize(name, attrs=nil, content=nil)
19
+ @name = name
20
+ @attrs = Hash.new
21
+ @style = Hash.new
22
+ attrs.each { |k, v| set_attr(k, v) } if attrs
23
+ if content == false
24
+ @content = Array.new
25
+ @uniq = true
26
+ else
27
+ @content = content ? content : Array.new
28
+ @uniq = false
29
+ end
30
+ self
31
+ end
32
+
33
+ @@quotechars = {
34
+ 'è' => '&egrave;',
35
+ 'ë' => '&euml;',
36
+ '>' => '&gt;',
37
+ '<' => '&lt;',
38
+ '"' => '&quot;',
39
+ '&' => '&amp;',
40
+ }
41
+
42
+ def add(*content)
43
+ content.each { |e|
44
+ if (e.class == Array)
45
+ add(*e)
46
+ next
47
+ end
48
+ if e.class.ancestors.include? Elem
49
+ @content << e
50
+ else
51
+ @content << e.to_s.gsub(Regexp.new("(#{@@quotechars.keys.join('|')})")) { |x| @@quotechars[x] }
52
+ end
53
+ }
54
+ self
55
+ end
56
+ alias << add
57
+
58
+ def add_style(k, v)
59
+ @style[k] = v
60
+ self
61
+ end
62
+
63
+ def set_attr(k, v)
64
+ if k == 'style'
65
+ v.split(/\s*;\s*/).each { |s|
66
+ add_style($1, $2) if s =~ /^\s*(\S+)\s*:\s*(.*?)\s*$/
67
+ }
68
+ else
69
+ @attrs[k]=v
70
+ end
71
+ self
72
+ end
73
+
74
+ def bg(c)
75
+ @style['background'] = c
76
+ self
77
+ end
78
+
79
+ def hclass(c)
80
+ @attrs['class'] = c
81
+ self
82
+ end
83
+
84
+ def length(start=nil)
85
+ # text length on one line w/o indent
86
+ if start
87
+ l = start.length
88
+ else
89
+ # '<name>'
90
+ l = @name.length + 2
91
+ @attrs.each{ |k, v|
92
+ l += " #{k}=\"#{v}\"".length
93
+ }
94
+ # ' style=""' - last '; '
95
+ l += 9-2 unless @style.empty?
96
+ # 'k: v; '
97
+ @style.each{ |k, v|
98
+ l += "#{k}: #{v}; ".length
99
+ }
100
+ # ' /'
101
+ l += 2 if @uniq
102
+ end
103
+ @content.each{ |c|
104
+ l += c.length
105
+ }
106
+ # '</name>'
107
+ l += 3+@name.length unless @uniq
108
+ return l
109
+ end
110
+
111
+ def to_s(indent = '')
112
+ attrs = @attrs.map { |k, v| " #{k}=\"#{v}\"" }.join
113
+ attrs += ' style="' + @style.map{ |k, v| "#{k}: #{v}" }.join('; ') + '"' unless @style.empty?
114
+ s = '' << indent << '<' << @name << attrs << (@uniq ? ' />' : '>')
115
+ if @uniq
116
+ s
117
+ elsif @name == 'pre'
118
+ s << @content.map { |c| c.to_s }.join.chomp << '</pre>'
119
+ else
120
+ if length(s) > LineLenMax
121
+ sindent = indent + IndentAdd
122
+ sep = "\n"
123
+ @content.each { |c|
124
+ case c
125
+ when Elem
126
+ if sep == ''
127
+ s << c.to_s(sindent).sub(/^\s+/, '')
128
+ else
129
+ news = c.to_s(sindent)
130
+ plen = s.length - (s.rindex("\n") || -1) - 1
131
+ plen -= 1 if s[-1, 1] == ' '
132
+ newss = news.sub(/^\s+/, '')
133
+ if not news.include?("\n") and s[-1] != ?> and
134
+ plen + 1 + newss.length <= LineLenMax
135
+ # concat inline tag to previous String
136
+ s << ' ' if s[-1, 1] != ' '
137
+ s << newss
138
+ else
139
+ s << sep if c.name =~ /^h\d$/ and c != @content.first
140
+ s << sep << news
141
+ end
142
+ end
143
+ when String
144
+ cw = c.split(/\s+/)
145
+ if @name == 'p' and c.object_id == @content.first.object_id
146
+ cw.shift if cw[0] == ''
147
+ s << "\n" << sindent
148
+ else
149
+ s << cw.shift.to_s
150
+ end
151
+ plen = s.length - (s.rindex("\n") || -1) - 1
152
+ while w = cw.shift
153
+ plen -= 1 if s[-1, 1] == ' '
154
+ if plen + 1 + w.length > LineLenMax
155
+ s << "\n" << sindent
156
+ plen = sindent.length
157
+ end
158
+ s << ' ' if s[-1, 1] != ' '
159
+ s << w
160
+ plen += w.length+1
161
+ end
162
+ if c !~ /\s+$/
163
+ sep = ''
164
+ next
165
+ end
166
+ else
167
+ s << sep << sindent << c.to_s
168
+ end
169
+ sep = "\n"
170
+ }
171
+ sep = "\n" if @name == 'p'
172
+ sep << indent if sep != ''
173
+ s << sep << "</#@name>"
174
+ else
175
+ s << @content.map { |c| c.to_s }.join << "</#@name>"
176
+ end
177
+ end
178
+ end
179
+
180
+ def inspect
181
+ "<#{@name}"+@content.map{|c|"\n"+c.inspect}.join+"\n/#{@name}>"
182
+ end
183
+ end
184
+
185
+ class Page < Elem
186
+ attr_reader :body, :head
187
+ def initialize
188
+ @body = Elem.new('body')
189
+ @head = Elem.new('head')
190
+ super('html', {'xmlns'=>'http://www.w3.org/1999/xhtml', 'xml:lang'=>'fr'})
191
+ add(@head)
192
+ add(@body)
193
+ end
194
+
195
+ def to_s
196
+ '<?xml version="1.0" encoding="us-ascii" ?>'+"\n"+
197
+ '<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.1//EN"'+"\n"+
198
+ IndentAdd*2+'"http://www.w3.org/TR/xhtml11/DTD/xhtml11.dtd">'+"\n"+
199
+ super.to_s
200
+ end
201
+ end
202
+ class Img < Elem
203
+ def initialize(src, alt=nil)
204
+ super('img', {'src'=>src}, false)
205
+ set_attr('alt', alt) if alt
206
+ self
207
+ end
208
+ end
209
+ class A < Elem
210
+ def initialize(href, text)
211
+ super('a', {'href'=>href}, [text])
212
+ end
213
+ end
214
+ class P < Elem
215
+ def initialize(content = nil)
216
+ super('p')
217
+ add(content) if content
218
+ self
219
+ end
220
+ end
221
+ class Div < Elem
222
+ def initialize(hclass = nil)
223
+ super('div')
224
+ hclass(hclass) if hclass
225
+ self
226
+ end
227
+ end
228
+ class Span < Elem
229
+ def initialize(hclass = nil)
230
+ super('span')
231
+ hclass(hclass) if hclass
232
+ self
233
+ end
234
+ end
235
+ class Stylesheet < Elem
236
+ def initialize(href)
237
+ super('link', {'rel'=>'stylesheet', 'type'=>'text/css', 'href'=>href}, false)
238
+ end
239
+ end
240
+ class Br < Elem
241
+ def initialize
242
+ super('br', nil, false)
243
+ end
244
+ end
245
+ class Hr < Elem
246
+ def initialize
247
+ super('hr', nil, false)
248
+ end
249
+ end
250
+
251
+ class List < Elem
252
+ def initialize(*elems)
253
+ super('ul')
254
+ elems.each { |e| add_line(e) }
255
+ end
256
+
257
+ def add_line(line)
258
+ add(Elem.new('li').add(line))
259
+ self
260
+ end
261
+ end
262
+ end
263
+
264
+ class Txt2Html
265
+ def initialize(f)
266
+ @@done ||= []
267
+ return if @@done.include? f
268
+ @@done << f
269
+
270
+ raise 'bad path' if (f.split('/') & ['.', '..']).first
271
+
272
+ outf = outfilename(f)
273
+ puts "compiling #{outf}..." if $VERBOSE
274
+
275
+ @pathfix = outf.split('/')[0...-1].map { '../' }.join
276
+ out = compile(File.read(f).gsub("\r", '') + "\n\n")
277
+ File.open(outf, 'wb') { |fd| fd.write out.to_s.gsub("\r", '').gsub("\n", "\r\n") }
278
+ end
279
+
280
+ def outfilename(f)
281
+ f.sub(/\.txt$/, '') + '.html'
282
+ end
283
+
284
+ def compile(raw)
285
+ prev = ''
286
+ state = {}
287
+ anchors = {}
288
+ out = Html::Page.new
289
+ out.head << Html::Stylesheet.new(@pathfix + 'style.css')
290
+ flush = lambda {
291
+ out.body << Html::P.new(compile_string(prev)) if prev.length > 0
292
+ [:pre, :list, :par].each { |f| state.delete f }
293
+ prev = ''
294
+ }
295
+ raw.each_line { |l|
296
+ case l = l.chomp
297
+ when /^([=#*-])\1{3,}$/
298
+ if prev.length > 0
299
+ # title
300
+ if not state[:h1] or state[:h1] == $1
301
+ state[:h1] = $1
302
+ e = 'h1'
303
+ elsif not state[:h2] or state[:h2] == $1
304
+ state[:h2] = $1
305
+ e = 'h2'
306
+ elsif not state[:h3] or state[:h3] == $1
307
+ state[:h3] = $1
308
+ e = 'h3'
309
+ else raise "unknown title level after #{prev.inspect}"
310
+ end
311
+ str = compile_string(prev)
312
+ state[:title] ||= str if e == 'h1'
313
+ if id = prev[/[a-z]\w+/i]
314
+ id = id.downcase
315
+ id += '_' while anchors[id]
316
+ anchors[id] = true
317
+ attr = { 'id' => id }
318
+ end
319
+ out.body << Html::Elem.new(e, attr).add(str)
320
+ prev = ''
321
+ flush[]
322
+ else
323
+ # horizontal rule
324
+ out.body << Html::Hr.new
325
+ flush[]
326
+ end
327
+ when /^([*-]+)\s+(.*)/
328
+ # list
329
+ bullet = $1
330
+ text = $2
331
+ if lst = state[:list] && state[:list][bullet]
332
+ state[:list].delete_if { |k, v| k.length > bullet.length }
333
+ else
334
+ flush[] if not state[:list]
335
+ state[:list] ||= {}
336
+ state[:list].delete_if { |k, v| k.length > bullet.length }
337
+ lst = state[:list][bullet] = Html::List.new
338
+ if pl = state[:list][bullet.chop]
339
+ pl.content.last.content << lst
340
+ else
341
+ out.body << lst
342
+ end
343
+ end
344
+ lst.add_line compile_string(text)
345
+
346
+ when /^\s+(\S.*)$/
347
+ # preformatted text
348
+ if not pre = state[:pre]
349
+ flush[]
350
+ pre = state[:pre] = Html::Elem.new('pre')
351
+ out.body << pre
352
+ end
353
+ pre.add compile_string(l) + ["\n"]
354
+ when /^\s*$/
355
+ flush[]
356
+ else
357
+ if state[:list]
358
+ lst = state[:list].sort.last[1]
359
+ lst.content.last.content << ' ' << compile_string(l)
360
+ else
361
+ prev << ' ' if prev.length > 0
362
+ prev << l
363
+ end
364
+ end
365
+ }
366
+ flush[]
367
+ out.head << Html::Elem.new('title').add(state[:title]) if state[:title]
368
+ out
369
+ end
370
+
371
+ # handle **bold_words** *italic* `fixed` <links>
372
+ def compile_string(str)
373
+ o = [str]
374
+ on = []
375
+ o.each { |s|
376
+ while s.kind_of? String and o1 = s.index('**') and o2 = s.index('**', o1+2) and not s[o1..o2].index(' ')
377
+ on << s[0...o1] << Html::Elem.new('b').add(s[o1+2...o2].tr('_', ' '))
378
+ s = s[o2+2..-1]
379
+ end
380
+ on << s
381
+ }
382
+ o = on
383
+ on = []
384
+ o.each { |s|
385
+ while s.kind_of? String and o1 = s.index('*') and o2 = s.index('*', o1+1) and not s[o1..o2].index(' ')
386
+ on << s[0...o1] << Html::Elem.new('i').add(s[o1+1...o2].tr('_', ' '))
387
+ s = s[o2+1..-1]
388
+ end
389
+ on << s
390
+ }
391
+ o = on
392
+ on = []
393
+ o.each { |s|
394
+ while s.kind_of? String and o1 = s.index('`') and o2 = s.index('`', o1+1)
395
+ on << s[0...o1] << Html::Span.new('quote').add(s[o1+1...o2])
396
+ s = s[o2+1..-1]
397
+ end
398
+ on << s
399
+ }
400
+ o = on
401
+ on = []
402
+ o.each { |s|
403
+ while s.kind_of? String and o1 = s.index('<') and o2 = s.index('>', o1+1) and not s[o1..o2].index(' ')
404
+ on << s[0...o1]
405
+ lnk = s[o1+1...o2]
406
+ s = s[o2+1..-1]
407
+ if File.exist? lnk
408
+ case lnk[/\.(\w+)$/, 1]
409
+ when 'txt'
410
+ tg = outfilename(lnk)
411
+ Txt2Html.new(lnk)
412
+ on << Html::A.new(@pathfix + tg, File.basename(lnk, '.txt').tr('_', ' '))
413
+ when 'jpg', 'png'
414
+ on << Html::Img.new(lnk)
415
+ end
416
+ else
417
+ if lnk =~ /\.txt$/
418
+ @@seen_nofile ||= []
419
+ if not @@seen_nofile.include? lnk
420
+ @@seen_nofile << lnk
421
+ puts "reference to missing #{lnk.inspect}"
422
+ end
423
+ end
424
+ on << Html::A.new(lnk, lnk)
425
+ end
426
+ end
427
+ on << s
428
+ }
429
+ o = on
430
+ end
431
+ end
432
+
433
+ if __FILE__ == $0
434
+ $VERBOSE = true if ARGV.delete '-v'
435
+ if ARGV.empty?
436
+ Dir.chdir(File.expand_path(File.join(File.dirname(__FILE__), '../doc')))
437
+ ARGV.concat Dir['**/index.txt']
438
+ end
439
+ ARGV.each { |a| Txt2Html.new(a) }
440
+ end