metasm 1.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (192) hide show
  1. data/BUGS +11 -0
  2. data/CREDITS +17 -0
  3. data/README +270 -0
  4. data/TODO +114 -0
  5. data/doc/code_organisation.txt +146 -0
  6. data/doc/const_missing.txt +16 -0
  7. data/doc/core_classes.txt +75 -0
  8. data/doc/feature_list.txt +53 -0
  9. data/doc/index.txt +59 -0
  10. data/doc/install_notes.txt +170 -0
  11. data/doc/style.css +3 -0
  12. data/doc/use_cases.txt +18 -0
  13. data/lib/metasm.rb +80 -0
  14. data/lib/metasm/arm.rb +12 -0
  15. data/lib/metasm/arm/debug.rb +39 -0
  16. data/lib/metasm/arm/decode.rb +167 -0
  17. data/lib/metasm/arm/encode.rb +77 -0
  18. data/lib/metasm/arm/main.rb +75 -0
  19. data/lib/metasm/arm/opcodes.rb +177 -0
  20. data/lib/metasm/arm/parse.rb +130 -0
  21. data/lib/metasm/arm/render.rb +55 -0
  22. data/lib/metasm/compile_c.rb +1457 -0
  23. data/lib/metasm/dalvik.rb +8 -0
  24. data/lib/metasm/dalvik/decode.rb +196 -0
  25. data/lib/metasm/dalvik/main.rb +60 -0
  26. data/lib/metasm/dalvik/opcodes.rb +366 -0
  27. data/lib/metasm/decode.rb +213 -0
  28. data/lib/metasm/decompile.rb +2659 -0
  29. data/lib/metasm/disassemble.rb +2068 -0
  30. data/lib/metasm/disassemble_api.rb +1280 -0
  31. data/lib/metasm/dynldr.rb +1329 -0
  32. data/lib/metasm/encode.rb +333 -0
  33. data/lib/metasm/exe_format/a_out.rb +194 -0
  34. data/lib/metasm/exe_format/autoexe.rb +82 -0
  35. data/lib/metasm/exe_format/bflt.rb +189 -0
  36. data/lib/metasm/exe_format/coff.rb +455 -0
  37. data/lib/metasm/exe_format/coff_decode.rb +901 -0
  38. data/lib/metasm/exe_format/coff_encode.rb +1078 -0
  39. data/lib/metasm/exe_format/dex.rb +457 -0
  40. data/lib/metasm/exe_format/dol.rb +145 -0
  41. data/lib/metasm/exe_format/elf.rb +923 -0
  42. data/lib/metasm/exe_format/elf_decode.rb +979 -0
  43. data/lib/metasm/exe_format/elf_encode.rb +1375 -0
  44. data/lib/metasm/exe_format/macho.rb +827 -0
  45. data/lib/metasm/exe_format/main.rb +228 -0
  46. data/lib/metasm/exe_format/mz.rb +164 -0
  47. data/lib/metasm/exe_format/nds.rb +172 -0
  48. data/lib/metasm/exe_format/pe.rb +437 -0
  49. data/lib/metasm/exe_format/serialstruct.rb +246 -0
  50. data/lib/metasm/exe_format/shellcode.rb +114 -0
  51. data/lib/metasm/exe_format/xcoff.rb +167 -0
  52. data/lib/metasm/gui.rb +23 -0
  53. data/lib/metasm/gui/cstruct.rb +373 -0
  54. data/lib/metasm/gui/dasm_coverage.rb +199 -0
  55. data/lib/metasm/gui/dasm_decomp.rb +369 -0
  56. data/lib/metasm/gui/dasm_funcgraph.rb +103 -0
  57. data/lib/metasm/gui/dasm_graph.rb +1354 -0
  58. data/lib/metasm/gui/dasm_hex.rb +543 -0
  59. data/lib/metasm/gui/dasm_listing.rb +599 -0
  60. data/lib/metasm/gui/dasm_main.rb +906 -0
  61. data/lib/metasm/gui/dasm_opcodes.rb +291 -0
  62. data/lib/metasm/gui/debug.rb +1228 -0
  63. data/lib/metasm/gui/gtk.rb +884 -0
  64. data/lib/metasm/gui/qt.rb +495 -0
  65. data/lib/metasm/gui/win32.rb +3004 -0
  66. data/lib/metasm/gui/x11.rb +621 -0
  67. data/lib/metasm/ia32.rb +14 -0
  68. data/lib/metasm/ia32/compile_c.rb +1523 -0
  69. data/lib/metasm/ia32/debug.rb +193 -0
  70. data/lib/metasm/ia32/decode.rb +1167 -0
  71. data/lib/metasm/ia32/decompile.rb +564 -0
  72. data/lib/metasm/ia32/encode.rb +314 -0
  73. data/lib/metasm/ia32/main.rb +233 -0
  74. data/lib/metasm/ia32/opcodes.rb +872 -0
  75. data/lib/metasm/ia32/parse.rb +327 -0
  76. data/lib/metasm/ia32/render.rb +91 -0
  77. data/lib/metasm/main.rb +1193 -0
  78. data/lib/metasm/mips.rb +11 -0
  79. data/lib/metasm/mips/compile_c.rb +7 -0
  80. data/lib/metasm/mips/decode.rb +253 -0
  81. data/lib/metasm/mips/encode.rb +51 -0
  82. data/lib/metasm/mips/main.rb +72 -0
  83. data/lib/metasm/mips/opcodes.rb +443 -0
  84. data/lib/metasm/mips/parse.rb +51 -0
  85. data/lib/metasm/mips/render.rb +43 -0
  86. data/lib/metasm/os/gnu_exports.rb +270 -0
  87. data/lib/metasm/os/linux.rb +1112 -0
  88. data/lib/metasm/os/main.rb +1686 -0
  89. data/lib/metasm/os/remote.rb +527 -0
  90. data/lib/metasm/os/windows.rb +2027 -0
  91. data/lib/metasm/os/windows_exports.rb +745 -0
  92. data/lib/metasm/parse.rb +876 -0
  93. data/lib/metasm/parse_c.rb +3938 -0
  94. data/lib/metasm/pic16c/decode.rb +42 -0
  95. data/lib/metasm/pic16c/main.rb +17 -0
  96. data/lib/metasm/pic16c/opcodes.rb +68 -0
  97. data/lib/metasm/ppc.rb +11 -0
  98. data/lib/metasm/ppc/decode.rb +264 -0
  99. data/lib/metasm/ppc/decompile.rb +251 -0
  100. data/lib/metasm/ppc/encode.rb +51 -0
  101. data/lib/metasm/ppc/main.rb +129 -0
  102. data/lib/metasm/ppc/opcodes.rb +410 -0
  103. data/lib/metasm/ppc/parse.rb +52 -0
  104. data/lib/metasm/preprocessor.rb +1277 -0
  105. data/lib/metasm/render.rb +130 -0
  106. data/lib/metasm/sh4.rb +8 -0
  107. data/lib/metasm/sh4/decode.rb +336 -0
  108. data/lib/metasm/sh4/main.rb +292 -0
  109. data/lib/metasm/sh4/opcodes.rb +381 -0
  110. data/lib/metasm/x86_64.rb +12 -0
  111. data/lib/metasm/x86_64/compile_c.rb +1025 -0
  112. data/lib/metasm/x86_64/debug.rb +59 -0
  113. data/lib/metasm/x86_64/decode.rb +268 -0
  114. data/lib/metasm/x86_64/encode.rb +264 -0
  115. data/lib/metasm/x86_64/main.rb +135 -0
  116. data/lib/metasm/x86_64/opcodes.rb +118 -0
  117. data/lib/metasm/x86_64/parse.rb +68 -0
  118. data/misc/bottleneck.rb +61 -0
  119. data/misc/cheader-findpppath.rb +58 -0
  120. data/misc/hexdiff.rb +74 -0
  121. data/misc/hexdump.rb +55 -0
  122. data/misc/metasm-all.rb +13 -0
  123. data/misc/objdiff.rb +47 -0
  124. data/misc/objscan.rb +40 -0
  125. data/misc/pdfparse.rb +661 -0
  126. data/misc/ppc_pdf2oplist.rb +192 -0
  127. data/misc/tcp_proxy_hex.rb +84 -0
  128. data/misc/txt2html.rb +440 -0
  129. data/samples/a.out.rb +31 -0
  130. data/samples/asmsyntax.rb +77 -0
  131. data/samples/bindiff.rb +555 -0
  132. data/samples/compilation-steps.rb +49 -0
  133. data/samples/cparser_makestackoffset.rb +55 -0
  134. data/samples/dasm-backtrack.rb +38 -0
  135. data/samples/dasmnavig.rb +318 -0
  136. data/samples/dbg-apihook.rb +228 -0
  137. data/samples/dbghelp.rb +143 -0
  138. data/samples/disassemble-gui.rb +102 -0
  139. data/samples/disassemble.rb +133 -0
  140. data/samples/dump_upx.rb +95 -0
  141. data/samples/dynamic_ruby.rb +1929 -0
  142. data/samples/elf_list_needed.rb +46 -0
  143. data/samples/elf_listexports.rb +33 -0
  144. data/samples/elfencode.rb +25 -0
  145. data/samples/exeencode.rb +128 -0
  146. data/samples/factorize-headers-elfimports.rb +77 -0
  147. data/samples/factorize-headers-peimports.rb +109 -0
  148. data/samples/factorize-headers.rb +43 -0
  149. data/samples/gdbclient.rb +583 -0
  150. data/samples/generate_libsigs.rb +102 -0
  151. data/samples/hotfix_gtk_dbg.rb +59 -0
  152. data/samples/install_win_env.rb +78 -0
  153. data/samples/lindebug.rb +924 -0
  154. data/samples/linux_injectsyscall.rb +95 -0
  155. data/samples/machoencode.rb +31 -0
  156. data/samples/metasm-shell.rb +91 -0
  157. data/samples/pe-hook.rb +69 -0
  158. data/samples/pe-ia32-cpuid.rb +203 -0
  159. data/samples/pe-mips.rb +35 -0
  160. data/samples/pe-shutdown.rb +78 -0
  161. data/samples/pe-testrelocs.rb +51 -0
  162. data/samples/pe-testrsrc.rb +24 -0
  163. data/samples/pe_listexports.rb +31 -0
  164. data/samples/peencode.rb +19 -0
  165. data/samples/peldr.rb +494 -0
  166. data/samples/preprocess-flatten.rb +19 -0
  167. data/samples/r0trace.rb +308 -0
  168. data/samples/rubstop.rb +399 -0
  169. data/samples/scan_pt_gnu_stack.rb +54 -0
  170. data/samples/scanpeexports.rb +62 -0
  171. data/samples/shellcode-c.rb +40 -0
  172. data/samples/shellcode-dynlink.rb +146 -0
  173. data/samples/source.asm +34 -0
  174. data/samples/struct_offset.rb +47 -0
  175. data/samples/testpe.rb +32 -0
  176. data/samples/testraw.rb +45 -0
  177. data/samples/win32genloader.rb +132 -0
  178. data/samples/win32hooker-advanced.rb +169 -0
  179. data/samples/win32hooker.rb +96 -0
  180. data/samples/win32livedasm.rb +33 -0
  181. data/samples/win32remotescan.rb +133 -0
  182. data/samples/wintrace.rb +92 -0
  183. data/tests/all.rb +8 -0
  184. data/tests/dasm.rb +39 -0
  185. data/tests/dynldr.rb +35 -0
  186. data/tests/encodeddata.rb +132 -0
  187. data/tests/ia32.rb +82 -0
  188. data/tests/mips.rb +116 -0
  189. data/tests/parse_c.rb +239 -0
  190. data/tests/preprocessor.rb +269 -0
  191. data/tests/x86_64.rb +62 -0
  192. metadata +255 -0
@@ -0,0 +1,192 @@
1
+ # This file is part of Metasm, the Ruby assembly manipulation suite
2
+ # Copyright (C) 2006-2009 Yoann GUILLOT
3
+ #
4
+ # Licence is LGPL, see LICENCE in the top-level directory
5
+ #
6
+ # parses the PPC specification PDF to generate the opcode list
7
+ #
8
+
9
+ require 'pdfparse'
10
+
11
+ $field_mask = {}
12
+ $field_shift = {}
13
+ $opcodes = []
14
+ def make_instr(bins, bits, text)
15
+ # calc bitfields length from their offset
16
+ last = 32
17
+ bitlen = []
18
+ bits.reverse_each { |bit|
19
+ bitlen.unshift last-bit
20
+ last = bit
21
+ }
22
+
23
+ # the opcode binary value (w/o fields)
24
+ bin = 0
25
+ fields = []
26
+
27
+ # parse the data
28
+ bins.zip(bits, bitlen).each { |val, off, len|
29
+ off = 32-(off+len)
30
+ msk = (1 << len) - 1
31
+ case val
32
+ when '/', '//', '///' # reserved field, value unspecified
33
+ when /^\d+$/; bin |= val.to_i << off # constant field
34
+ when /^[A-Za-z]+$/
35
+ fld = val.downcase.to_sym
36
+ fld = "#{fld}_".to_sym while $field_mask[fld] and ($field_mask[fld] != msk or $field_shift[fld] != off)
37
+ fields << fld
38
+ $field_mask[fld] ||= msk
39
+ $field_shift[fld] ||= off
40
+ end
41
+ }
42
+
43
+ text.each { |txt|
44
+ # fnabs FRT,FRB (Rc=0)
45
+ curbin = bin
46
+ curfields = fields.dup
47
+ txt.sub!(' Rc=1)', ' (Rc=1)') if txt.include? 'fdiv.' # typo: fdiv. has no '('
48
+ if txt =~ /(.*\S)\s*\((\w+=.*)\)/
49
+ txt = $1
50
+ $2.split.each { |e|
51
+ raise e if e !~ /(\w+)=(\d+)/
52
+ name, val = $1.downcase, $2.to_i
53
+ raise "bad bit #{name} in #{txt}" if not fld = curfields.find { |fld_| fld_.to_s.delete('_') == name }
54
+ curfields.delete fld
55
+ curbin |= val << $field_shift[fld]
56
+ }
57
+ end
58
+ opname, args = txt.split(/\s+/, 2)
59
+ args = args.to_s.downcase.split(/\s*,\s*/).map { |arg| fld = curfields.find { |fld_| fld_.to_s.delete('_') == arg } ; curfields.delete fld ; fld }
60
+ if args.include? nil and curfields.length == 2 and (curfields - [:ra, :d]).empty?
61
+ args[args.index(nil)] = :ra_i16
62
+ curfields.clear
63
+ elsif args.include? nil and curfields.length == 2 and (curfields - [:ra, :ds]).empty?
64
+ args[args.index(nil)] = :ra_i16s
65
+ curfields.clear
66
+ elsif args.include? nil and curfields.length == 2 and (curfields - [:ra, :dq]).empty?
67
+ args[args.index(nil)] = :ra_i16q
68
+ curfields.clear
69
+ elsif args.include? nil and curfields.length == 1
70
+ args[args.index(nil)] = curfields.shift
71
+ end
72
+ raise "bad args #{args.inspect} (#{curfields.inspect}) in #{txt}" if args.include? nil
73
+ $opcodes << [opname, curbin, args]
74
+
75
+ n = (opname.inspect << ',').ljust(10) + '0x%08X' % curbin
76
+ n << ', ' if not args.empty?
77
+ puts "\taddop " + n + args.map { |e| e.inspect }.join(', ')
78
+ }
79
+ end
80
+
81
+ # handle instruction aliases
82
+ # NOT WORKING
83
+ # should be implemented in the parser/displayer instead of opcode list
84
+ # manual work needed for eg conditionnal jumps
85
+ def make_alias(newop, newargs, oldop, oldargs)
86
+ raise "unknown alias #{newop} => #{oldop}" if not op = $opcodes.reverse.find { |op_| op_[0] == oldop }
87
+ op2 = op.dup
88
+ op2[0] = newop
89
+ oldargs.each_with_index { |oa, i|
90
+ # XXX bcctr 4, 6 -> bcctr 4, 6, 0 => not the work
91
+ if oa =~ /^[0-9]+$/ or oa =~ /^0x[0-9a-f]+$/i
92
+ fld = op[2][i]
93
+ op2[1] |= Integer(oa) << $field_shift[fld]
94
+ end
95
+ }
96
+ puts "#\talias #{newop} #{newargs.join(', ')} -> #{oldop} #{oldargs.join(', ')}".downcase
97
+ end
98
+
99
+ require 'enumerator'
100
+ def epilog
101
+ puts "\n\t@field_shift = {"
102
+ puts $field_shift.sort_by { |k, v| k.to_s }.enum_slice(6).map { |slc|
103
+ "\t\t" + slc.map { |k, v| "#{k.inspect} => #{v}" }.join(', ')
104
+ }.join(",\n")
105
+ puts "\t}"
106
+ puts "\n\t@field_mask = {"
107
+ puts $field_mask.sort_by { |k, v| k.to_s }.enum_slice(6).map { |slc|
108
+ "\t\t" + slc.map { |k, v| "#{k.inspect} => #{v > 1000 ? '0x%X' % v : v}" }.join(', ')
109
+ }.join(",\n")
110
+ puts "\t}"
111
+ end
112
+
113
+ $foundop = false
114
+ def parse_page(lines)
115
+ # all instr defining pages include this
116
+ return unless lines.find { |l| l.str =~ /Special Registers Altered|Memory Barrier Instructions|Data Cache Instructions/ } # sync L/dcbt
117
+
118
+ ilist = [] # line buffer
119
+ extended = false
120
+
121
+ # concat lines with same y
122
+ lines = lines.sort_by { |l| [-l.y, l.x] }
123
+ lastline = nil
124
+ lines.delete_if { |l|
125
+ if lastline and lastline.y == l.y and ([lastline.fontx, lastline.fonty] == [l.fontx, l.fonty] or l.str =~ /^\s*$/)
126
+ lastline.str << ' ' << l.str
127
+ true
128
+ else
129
+ lastline = l
130
+ false
131
+ end
132
+ }
133
+
134
+ lines.each { |l|
135
+ # search for the bit indices list
136
+ if l.fonty < 7 and l.str =~ /^0 [\d ]+ 31\s*$/ and (ilist.last.str.split.length == l.str.split.length or ilist.last.str.split.length == l.str.split.length-1)
137
+ $foundop = true
138
+ bitindices = l.str.split.map { |i| i.to_i }
139
+ # previous line is the binary encoding
140
+ encoding = ilist.pop.str.split
141
+ bitindices.pop if encoding.length < bitindices.length
142
+ # previous line is the instruction text format
143
+ ilist.pop if ilist.last.str =~ /\[POWER2? mnemonics?: (.*)\]/
144
+ text = []
145
+ text.unshift l while l = ilist.pop and l = l.str and (l =~ /,|\)$/ or text.empty?)
146
+ ilist = []
147
+ make_instr(encoding, bitindices, text)
148
+ elsif l.str.include? 'Special Registers Altered'
149
+ if not $foundop
150
+ puts ilist.map { |l_| "(#{l_.y}) #{l_.str}" }
151
+ puts lines.map { |l_| "(#{l_.y}) #{l_.str}" } if ilist.empty?
152
+ raise 'nofoundop'
153
+ else
154
+ $foundop = false
155
+ end
156
+ elsif l.str =~ /Extended:\s+Equivalent to:/
157
+ extended = true
158
+ elsif extended
159
+ if l.str.include? ',' and l.str =~ /^(\S+)\s+(\S+)\s+(\S+)\s+(.*)/ and $opcodes.find { |op| op[0] == $3 }
160
+ newop, newargs, exop, exargs = $1, $2, $3, $4
161
+ make_alias(newop, newargs.split(','), exop, exargs.split(','))
162
+ else extended = false
163
+ end
164
+ else ilist << l
165
+ end
166
+ }
167
+ end
168
+
169
+ # PowerPC Architecture v2.02:
170
+ # 1 - User Instruction Set
171
+ # 2 - Virtual Environment
172
+ # 3 - Operating Environment
173
+ Dir['PPC_Vers202_Book?_public.pdf'].sort.each { |book|
174
+ $stderr.puts book if $stderr.tty?
175
+ pdf = PDF.read book
176
+ pagecount = pdf.trailer['Root']['Pages']['Count'] || 0
177
+ curpage = 0
178
+ pdf.each_page { |p|
179
+ $stderr.print "#{curpage+=1}/#{pagecount} \r" if $stderr.tty?
180
+ p.clip_lines(50, 740)
181
+ list = p.lines.flatten
182
+
183
+ # split columns
184
+ sp1, sp2 = list.partition { |l| l.x < 288 }
185
+
186
+ parse_page(sp1)
187
+ parse_page(sp2)
188
+ }
189
+ $stderr.print " \r" if $stderr.tty?
190
+ }
191
+
192
+ epilog()
@@ -0,0 +1,84 @@
1
+ #!/usr/bin/ruby
2
+ # This file is part of Metasm, the Ruby assembly manipulation suite
3
+ # Copyright (C) 2006-2009 Yoann GUILLOT
4
+ #
5
+ # Licence is LGPL, see LICENCE in the top-level directory
6
+
7
+
8
+ # this is a TCP proxy which dumps the transmitted data in hex on stdout
9
+ # usage: tcp_proxy <lhost> <lport> <rhost> <rport> [<timeout_s>]
10
+
11
+ require 'socket'
12
+ require File.join(File.dirname(__FILE__), 'hexdump')
13
+
14
+ def bouncepkt(clt, srv, timeout=nil)
15
+ s2c = ''
16
+ c2s = ''
17
+ loop do
18
+ break if not IO.select([clt, srv], nil, nil, timeout)
19
+
20
+ while srv and s2c.length < 1024*16 and IO.select([srv], nil, nil, 0)
21
+ str = (srv.read(1) rescue nil)
22
+ if not str or str.empty?
23
+ srv = false
24
+ else
25
+ s2c << str
26
+ end
27
+ end
28
+
29
+ while clt and c2s.length < 1024*16 and IO.select([clt], nil, nil, 0)
30
+ str = (clt.read(1) rescue nil)
31
+ if not str or str.empty?
32
+ clt = false
33
+ else
34
+ c2s << str
35
+ end
36
+ end
37
+
38
+ if clt and s2c.length > 0 and IO.select(nil, [clt], nil, 0)
39
+ puts Time.now.strftime('s -> c %H:%M:%S')
40
+ s2c.hexdump(:fmt => ['c', 'a'])
41
+ clt.write s2c
42
+ s2c.replace ''
43
+ end
44
+
45
+ if srv and c2s.length > 0 and IO.select(nil, [srv], nil, 0)
46
+ puts Time.now.strftime('c -> s %H:%M:%S')
47
+ c2s.hexdump(:fmt => ['c', 'a'])
48
+ srv.write c2s
49
+ c2s.replace ''
50
+ end
51
+ break if not clt or not srv
52
+ end
53
+ end
54
+
55
+
56
+ if $0 == __FILE__
57
+ if ARGV.length < 4
58
+ abort "usage: bnc <lhost> <lport> <rhost> <rport> [<timeout_s>]"
59
+ end
60
+
61
+ lhost = ARGV.shift
62
+ lport = Integer(ARGV.shift)
63
+ rhost = ARGV.shift
64
+ rport = Integer(ARGV.shift)
65
+ timeout = Float(ARGV.shift) if not ARGV.empty?
66
+
67
+ s = TCPServer.new(lhost, lport)
68
+
69
+ loop do
70
+ puts "waiting..."
71
+ a = s.accept
72
+ puts "incoming connection"
73
+ c = TCPSocket.new(rhost, rport)
74
+
75
+ begin
76
+ bouncepkt(a, c, timeout)
77
+ rescue SystemCallError
78
+ end
79
+
80
+ puts "connection closed"
81
+ a.close
82
+ c.close
83
+ end
84
+ end
@@ -0,0 +1,440 @@
1
+ #!/usr/bin/env ruby
2
+ # This file is part of Metasm, the Ruby assembly manipulation suite
3
+ # Copyright (C) 2006-2009 Yoann GUILLOT
4
+ #
5
+ # Licence is LGPL, see LICENCE in the top-level directory
6
+
7
+ # This scripts is used to compile the Metasm documentation into html files
8
+ # Losely inspired from the rst syntax
9
+
10
+ # stuff to generate html code
11
+ module Html
12
+ class Elem
13
+ attr_reader :name, :attrs, :content, :style
14
+
15
+ IndentAdd = ' '
16
+ LineLenMax = 80
17
+
18
+ def initialize(name, attrs=nil, content=nil)
19
+ @name = name
20
+ @attrs = Hash.new
21
+ @style = Hash.new
22
+ attrs.each { |k, v| set_attr(k, v) } if attrs
23
+ if content == false
24
+ @content = Array.new
25
+ @uniq = true
26
+ else
27
+ @content = content ? content : Array.new
28
+ @uniq = false
29
+ end
30
+ self
31
+ end
32
+
33
+ @@quotechars = {
34
+ 'è' => '&egrave;',
35
+ 'ë' => '&euml;',
36
+ '>' => '&gt;',
37
+ '<' => '&lt;',
38
+ '"' => '&quot;',
39
+ '&' => '&amp;',
40
+ }
41
+
42
+ def add(*content)
43
+ content.each { |e|
44
+ if (e.class == Array)
45
+ add(*e)
46
+ next
47
+ end
48
+ if e.class.ancestors.include? Elem
49
+ @content << e
50
+ else
51
+ @content << e.to_s.gsub(Regexp.new("(#{@@quotechars.keys.join('|')})")) { |x| @@quotechars[x] }
52
+ end
53
+ }
54
+ self
55
+ end
56
+ alias << add
57
+
58
+ def add_style(k, v)
59
+ @style[k] = v
60
+ self
61
+ end
62
+
63
+ def set_attr(k, v)
64
+ if k == 'style'
65
+ v.split(/\s*;\s*/).each { |s|
66
+ add_style($1, $2) if s =~ /^\s*(\S+)\s*:\s*(.*?)\s*$/
67
+ }
68
+ else
69
+ @attrs[k]=v
70
+ end
71
+ self
72
+ end
73
+
74
+ def bg(c)
75
+ @style['background'] = c
76
+ self
77
+ end
78
+
79
+ def hclass(c)
80
+ @attrs['class'] = c
81
+ self
82
+ end
83
+
84
+ def length(start=nil)
85
+ # text length on one line w/o indent
86
+ if start
87
+ l = start.length
88
+ else
89
+ # '<name>'
90
+ l = @name.length + 2
91
+ @attrs.each{ |k, v|
92
+ l += " #{k}=\"#{v}\"".length
93
+ }
94
+ # ' style=""' - last '; '
95
+ l += 9-2 unless @style.empty?
96
+ # 'k: v; '
97
+ @style.each{ |k, v|
98
+ l += "#{k}: #{v}; ".length
99
+ }
100
+ # ' /'
101
+ l += 2 if @uniq
102
+ end
103
+ @content.each{ |c|
104
+ l += c.length
105
+ }
106
+ # '</name>'
107
+ l += 3+@name.length unless @uniq
108
+ return l
109
+ end
110
+
111
+ def to_s(indent = '')
112
+ attrs = @attrs.map { |k, v| " #{k}=\"#{v}\"" }.join
113
+ attrs += ' style="' + @style.map{ |k, v| "#{k}: #{v}" }.join('; ') + '"' unless @style.empty?
114
+ s = '' << indent << '<' << @name << attrs << (@uniq ? ' />' : '>')
115
+ if @uniq
116
+ s
117
+ elsif @name == 'pre'
118
+ s << @content.map { |c| c.to_s }.join.chomp << '</pre>'
119
+ else
120
+ if length(s) > LineLenMax
121
+ sindent = indent + IndentAdd
122
+ sep = "\n"
123
+ @content.each { |c|
124
+ case c
125
+ when Elem
126
+ if sep == ''
127
+ s << c.to_s(sindent).sub(/^\s+/, '')
128
+ else
129
+ news = c.to_s(sindent)
130
+ plen = s.length - (s.rindex("\n") || -1) - 1
131
+ plen -= 1 if s[-1, 1] == ' '
132
+ newss = news.sub(/^\s+/, '')
133
+ if not news.include?("\n") and s[-1] != ?> and
134
+ plen + 1 + newss.length <= LineLenMax
135
+ # concat inline tag to previous String
136
+ s << ' ' if s[-1, 1] != ' '
137
+ s << newss
138
+ else
139
+ s << sep if c.name =~ /^h\d$/ and c != @content.first
140
+ s << sep << news
141
+ end
142
+ end
143
+ when String
144
+ cw = c.split(/\s+/)
145
+ if @name == 'p' and c.object_id == @content.first.object_id
146
+ cw.shift if cw[0] == ''
147
+ s << "\n" << sindent
148
+ else
149
+ s << cw.shift.to_s
150
+ end
151
+ plen = s.length - (s.rindex("\n") || -1) - 1
152
+ while w = cw.shift
153
+ plen -= 1 if s[-1, 1] == ' '
154
+ if plen + 1 + w.length > LineLenMax
155
+ s << "\n" << sindent
156
+ plen = sindent.length
157
+ end
158
+ s << ' ' if s[-1, 1] != ' '
159
+ s << w
160
+ plen += w.length+1
161
+ end
162
+ if c !~ /\s+$/
163
+ sep = ''
164
+ next
165
+ end
166
+ else
167
+ s << sep << sindent << c.to_s
168
+ end
169
+ sep = "\n"
170
+ }
171
+ sep = "\n" if @name == 'p'
172
+ sep << indent if sep != ''
173
+ s << sep << "</#@name>"
174
+ else
175
+ s << @content.map { |c| c.to_s }.join << "</#@name>"
176
+ end
177
+ end
178
+ end
179
+
180
+ def inspect
181
+ "<#{@name}"+@content.map{|c|"\n"+c.inspect}.join+"\n/#{@name}>"
182
+ end
183
+ end
184
+
185
+ class Page < Elem
186
+ attr_reader :body, :head
187
+ def initialize
188
+ @body = Elem.new('body')
189
+ @head = Elem.new('head')
190
+ super('html', {'xmlns'=>'http://www.w3.org/1999/xhtml', 'xml:lang'=>'fr'})
191
+ add(@head)
192
+ add(@body)
193
+ end
194
+
195
+ def to_s
196
+ '<?xml version="1.0" encoding="us-ascii" ?>'+"\n"+
197
+ '<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.1//EN"'+"\n"+
198
+ IndentAdd*2+'"http://www.w3.org/TR/xhtml11/DTD/xhtml11.dtd">'+"\n"+
199
+ super.to_s
200
+ end
201
+ end
202
+ class Img < Elem
203
+ def initialize(src, alt=nil)
204
+ super('img', {'src'=>src}, false)
205
+ set_attr('alt', alt) if alt
206
+ self
207
+ end
208
+ end
209
+ class A < Elem
210
+ def initialize(href, text)
211
+ super('a', {'href'=>href}, [text])
212
+ end
213
+ end
214
+ class P < Elem
215
+ def initialize(content = nil)
216
+ super('p')
217
+ add(content) if content
218
+ self
219
+ end
220
+ end
221
+ class Div < Elem
222
+ def initialize(hclass = nil)
223
+ super('div')
224
+ hclass(hclass) if hclass
225
+ self
226
+ end
227
+ end
228
+ class Span < Elem
229
+ def initialize(hclass = nil)
230
+ super('span')
231
+ hclass(hclass) if hclass
232
+ self
233
+ end
234
+ end
235
+ class Stylesheet < Elem
236
+ def initialize(href)
237
+ super('link', {'rel'=>'stylesheet', 'type'=>'text/css', 'href'=>href}, false)
238
+ end
239
+ end
240
+ class Br < Elem
241
+ def initialize
242
+ super('br', nil, false)
243
+ end
244
+ end
245
+ class Hr < Elem
246
+ def initialize
247
+ super('hr', nil, false)
248
+ end
249
+ end
250
+
251
+ class List < Elem
252
+ def initialize(*elems)
253
+ super('ul')
254
+ elems.each { |e| add_line(e) }
255
+ end
256
+
257
+ def add_line(line)
258
+ add(Elem.new('li').add(line))
259
+ self
260
+ end
261
+ end
262
+ end
263
+
264
+ class Txt2Html
265
+ def initialize(f)
266
+ @@done ||= []
267
+ return if @@done.include? f
268
+ @@done << f
269
+
270
+ raise 'bad path' if (f.split('/') & ['.', '..']).first
271
+
272
+ outf = outfilename(f)
273
+ puts "compiling #{outf}..." if $VERBOSE
274
+
275
+ @pathfix = outf.split('/')[0...-1].map { '../' }.join
276
+ out = compile(File.read(f).gsub("\r", '') + "\n\n")
277
+ File.open(outf, 'wb') { |fd| fd.write out.to_s.gsub("\r", '').gsub("\n", "\r\n") }
278
+ end
279
+
280
+ def outfilename(f)
281
+ f.sub(/\.txt$/, '') + '.html'
282
+ end
283
+
284
+ def compile(raw)
285
+ prev = ''
286
+ state = {}
287
+ anchors = {}
288
+ out = Html::Page.new
289
+ out.head << Html::Stylesheet.new(@pathfix + 'style.css')
290
+ flush = lambda {
291
+ out.body << Html::P.new(compile_string(prev)) if prev.length > 0
292
+ [:pre, :list, :par].each { |f| state.delete f }
293
+ prev = ''
294
+ }
295
+ raw.each_line { |l|
296
+ case l = l.chomp
297
+ when /^([=#*-])\1{3,}$/
298
+ if prev.length > 0
299
+ # title
300
+ if not state[:h1] or state[:h1] == $1
301
+ state[:h1] = $1
302
+ e = 'h1'
303
+ elsif not state[:h2] or state[:h2] == $1
304
+ state[:h2] = $1
305
+ e = 'h2'
306
+ elsif not state[:h3] or state[:h3] == $1
307
+ state[:h3] = $1
308
+ e = 'h3'
309
+ else raise "unknown title level after #{prev.inspect}"
310
+ end
311
+ str = compile_string(prev)
312
+ state[:title] ||= str if e == 'h1'
313
+ if id = prev[/[a-z]\w+/i]
314
+ id = id.downcase
315
+ id += '_' while anchors[id]
316
+ anchors[id] = true
317
+ attr = { 'id' => id }
318
+ end
319
+ out.body << Html::Elem.new(e, attr).add(str)
320
+ prev = ''
321
+ flush[]
322
+ else
323
+ # horizontal rule
324
+ out.body << Html::Hr.new
325
+ flush[]
326
+ end
327
+ when /^([*-]+)\s+(.*)/
328
+ # list
329
+ bullet = $1
330
+ text = $2
331
+ if lst = state[:list] && state[:list][bullet]
332
+ state[:list].delete_if { |k, v| k.length > bullet.length }
333
+ else
334
+ flush[] if not state[:list]
335
+ state[:list] ||= {}
336
+ state[:list].delete_if { |k, v| k.length > bullet.length }
337
+ lst = state[:list][bullet] = Html::List.new
338
+ if pl = state[:list][bullet.chop]
339
+ pl.content.last.content << lst
340
+ else
341
+ out.body << lst
342
+ end
343
+ end
344
+ lst.add_line compile_string(text)
345
+
346
+ when /^\s+(\S.*)$/
347
+ # preformatted text
348
+ if not pre = state[:pre]
349
+ flush[]
350
+ pre = state[:pre] = Html::Elem.new('pre')
351
+ out.body << pre
352
+ end
353
+ pre.add compile_string(l) + ["\n"]
354
+ when /^\s*$/
355
+ flush[]
356
+ else
357
+ if state[:list]
358
+ lst = state[:list].sort.last[1]
359
+ lst.content.last.content << ' ' << compile_string(l)
360
+ else
361
+ prev << ' ' if prev.length > 0
362
+ prev << l
363
+ end
364
+ end
365
+ }
366
+ flush[]
367
+ out.head << Html::Elem.new('title').add(state[:title]) if state[:title]
368
+ out
369
+ end
370
+
371
+ # handle **bold_words** *italic* `fixed` <links>
372
+ def compile_string(str)
373
+ o = [str]
374
+ on = []
375
+ o.each { |s|
376
+ while s.kind_of? String and o1 = s.index('**') and o2 = s.index('**', o1+2) and not s[o1..o2].index(' ')
377
+ on << s[0...o1] << Html::Elem.new('b').add(s[o1+2...o2].tr('_', ' '))
378
+ s = s[o2+2..-1]
379
+ end
380
+ on << s
381
+ }
382
+ o = on
383
+ on = []
384
+ o.each { |s|
385
+ while s.kind_of? String and o1 = s.index('*') and o2 = s.index('*', o1+1) and not s[o1..o2].index(' ')
386
+ on << s[0...o1] << Html::Elem.new('i').add(s[o1+1...o2].tr('_', ' '))
387
+ s = s[o2+1..-1]
388
+ end
389
+ on << s
390
+ }
391
+ o = on
392
+ on = []
393
+ o.each { |s|
394
+ while s.kind_of? String and o1 = s.index('`') and o2 = s.index('`', o1+1)
395
+ on << s[0...o1] << Html::Span.new('quote').add(s[o1+1...o2])
396
+ s = s[o2+1..-1]
397
+ end
398
+ on << s
399
+ }
400
+ o = on
401
+ on = []
402
+ o.each { |s|
403
+ while s.kind_of? String and o1 = s.index('<') and o2 = s.index('>', o1+1) and not s[o1..o2].index(' ')
404
+ on << s[0...o1]
405
+ lnk = s[o1+1...o2]
406
+ s = s[o2+1..-1]
407
+ if File.exist? lnk
408
+ case lnk[/\.(\w+)$/, 1]
409
+ when 'txt'
410
+ tg = outfilename(lnk)
411
+ Txt2Html.new(lnk)
412
+ on << Html::A.new(@pathfix + tg, File.basename(lnk, '.txt').tr('_', ' '))
413
+ when 'jpg', 'png'
414
+ on << Html::Img.new(lnk)
415
+ end
416
+ else
417
+ if lnk =~ /\.txt$/
418
+ @@seen_nofile ||= []
419
+ if not @@seen_nofile.include? lnk
420
+ @@seen_nofile << lnk
421
+ puts "reference to missing #{lnk.inspect}"
422
+ end
423
+ end
424
+ on << Html::A.new(lnk, lnk)
425
+ end
426
+ end
427
+ on << s
428
+ }
429
+ o = on
430
+ end
431
+ end
432
+
433
+ if __FILE__ == $0
434
+ $VERBOSE = true if ARGV.delete '-v'
435
+ if ARGV.empty?
436
+ Dir.chdir(File.expand_path(File.join(File.dirname(__FILE__), '../doc')))
437
+ ARGV.concat Dir['**/index.txt']
438
+ end
439
+ ARGV.each { |a| Txt2Html.new(a) }
440
+ end