metasm 1.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (192) hide show
  1. data/BUGS +11 -0
  2. data/CREDITS +17 -0
  3. data/README +270 -0
  4. data/TODO +114 -0
  5. data/doc/code_organisation.txt +146 -0
  6. data/doc/const_missing.txt +16 -0
  7. data/doc/core_classes.txt +75 -0
  8. data/doc/feature_list.txt +53 -0
  9. data/doc/index.txt +59 -0
  10. data/doc/install_notes.txt +170 -0
  11. data/doc/style.css +3 -0
  12. data/doc/use_cases.txt +18 -0
  13. data/lib/metasm.rb +80 -0
  14. data/lib/metasm/arm.rb +12 -0
  15. data/lib/metasm/arm/debug.rb +39 -0
  16. data/lib/metasm/arm/decode.rb +167 -0
  17. data/lib/metasm/arm/encode.rb +77 -0
  18. data/lib/metasm/arm/main.rb +75 -0
  19. data/lib/metasm/arm/opcodes.rb +177 -0
  20. data/lib/metasm/arm/parse.rb +130 -0
  21. data/lib/metasm/arm/render.rb +55 -0
  22. data/lib/metasm/compile_c.rb +1457 -0
  23. data/lib/metasm/dalvik.rb +8 -0
  24. data/lib/metasm/dalvik/decode.rb +196 -0
  25. data/lib/metasm/dalvik/main.rb +60 -0
  26. data/lib/metasm/dalvik/opcodes.rb +366 -0
  27. data/lib/metasm/decode.rb +213 -0
  28. data/lib/metasm/decompile.rb +2659 -0
  29. data/lib/metasm/disassemble.rb +2068 -0
  30. data/lib/metasm/disassemble_api.rb +1280 -0
  31. data/lib/metasm/dynldr.rb +1329 -0
  32. data/lib/metasm/encode.rb +333 -0
  33. data/lib/metasm/exe_format/a_out.rb +194 -0
  34. data/lib/metasm/exe_format/autoexe.rb +82 -0
  35. data/lib/metasm/exe_format/bflt.rb +189 -0
  36. data/lib/metasm/exe_format/coff.rb +455 -0
  37. data/lib/metasm/exe_format/coff_decode.rb +901 -0
  38. data/lib/metasm/exe_format/coff_encode.rb +1078 -0
  39. data/lib/metasm/exe_format/dex.rb +457 -0
  40. data/lib/metasm/exe_format/dol.rb +145 -0
  41. data/lib/metasm/exe_format/elf.rb +923 -0
  42. data/lib/metasm/exe_format/elf_decode.rb +979 -0
  43. data/lib/metasm/exe_format/elf_encode.rb +1375 -0
  44. data/lib/metasm/exe_format/macho.rb +827 -0
  45. data/lib/metasm/exe_format/main.rb +228 -0
  46. data/lib/metasm/exe_format/mz.rb +164 -0
  47. data/lib/metasm/exe_format/nds.rb +172 -0
  48. data/lib/metasm/exe_format/pe.rb +437 -0
  49. data/lib/metasm/exe_format/serialstruct.rb +246 -0
  50. data/lib/metasm/exe_format/shellcode.rb +114 -0
  51. data/lib/metasm/exe_format/xcoff.rb +167 -0
  52. data/lib/metasm/gui.rb +23 -0
  53. data/lib/metasm/gui/cstruct.rb +373 -0
  54. data/lib/metasm/gui/dasm_coverage.rb +199 -0
  55. data/lib/metasm/gui/dasm_decomp.rb +369 -0
  56. data/lib/metasm/gui/dasm_funcgraph.rb +103 -0
  57. data/lib/metasm/gui/dasm_graph.rb +1354 -0
  58. data/lib/metasm/gui/dasm_hex.rb +543 -0
  59. data/lib/metasm/gui/dasm_listing.rb +599 -0
  60. data/lib/metasm/gui/dasm_main.rb +906 -0
  61. data/lib/metasm/gui/dasm_opcodes.rb +291 -0
  62. data/lib/metasm/gui/debug.rb +1228 -0
  63. data/lib/metasm/gui/gtk.rb +884 -0
  64. data/lib/metasm/gui/qt.rb +495 -0
  65. data/lib/metasm/gui/win32.rb +3004 -0
  66. data/lib/metasm/gui/x11.rb +621 -0
  67. data/lib/metasm/ia32.rb +14 -0
  68. data/lib/metasm/ia32/compile_c.rb +1523 -0
  69. data/lib/metasm/ia32/debug.rb +193 -0
  70. data/lib/metasm/ia32/decode.rb +1167 -0
  71. data/lib/metasm/ia32/decompile.rb +564 -0
  72. data/lib/metasm/ia32/encode.rb +314 -0
  73. data/lib/metasm/ia32/main.rb +233 -0
  74. data/lib/metasm/ia32/opcodes.rb +872 -0
  75. data/lib/metasm/ia32/parse.rb +327 -0
  76. data/lib/metasm/ia32/render.rb +91 -0
  77. data/lib/metasm/main.rb +1193 -0
  78. data/lib/metasm/mips.rb +11 -0
  79. data/lib/metasm/mips/compile_c.rb +7 -0
  80. data/lib/metasm/mips/decode.rb +253 -0
  81. data/lib/metasm/mips/encode.rb +51 -0
  82. data/lib/metasm/mips/main.rb +72 -0
  83. data/lib/metasm/mips/opcodes.rb +443 -0
  84. data/lib/metasm/mips/parse.rb +51 -0
  85. data/lib/metasm/mips/render.rb +43 -0
  86. data/lib/metasm/os/gnu_exports.rb +270 -0
  87. data/lib/metasm/os/linux.rb +1112 -0
  88. data/lib/metasm/os/main.rb +1686 -0
  89. data/lib/metasm/os/remote.rb +527 -0
  90. data/lib/metasm/os/windows.rb +2027 -0
  91. data/lib/metasm/os/windows_exports.rb +745 -0
  92. data/lib/metasm/parse.rb +876 -0
  93. data/lib/metasm/parse_c.rb +3938 -0
  94. data/lib/metasm/pic16c/decode.rb +42 -0
  95. data/lib/metasm/pic16c/main.rb +17 -0
  96. data/lib/metasm/pic16c/opcodes.rb +68 -0
  97. data/lib/metasm/ppc.rb +11 -0
  98. data/lib/metasm/ppc/decode.rb +264 -0
  99. data/lib/metasm/ppc/decompile.rb +251 -0
  100. data/lib/metasm/ppc/encode.rb +51 -0
  101. data/lib/metasm/ppc/main.rb +129 -0
  102. data/lib/metasm/ppc/opcodes.rb +410 -0
  103. data/lib/metasm/ppc/parse.rb +52 -0
  104. data/lib/metasm/preprocessor.rb +1277 -0
  105. data/lib/metasm/render.rb +130 -0
  106. data/lib/metasm/sh4.rb +8 -0
  107. data/lib/metasm/sh4/decode.rb +336 -0
  108. data/lib/metasm/sh4/main.rb +292 -0
  109. data/lib/metasm/sh4/opcodes.rb +381 -0
  110. data/lib/metasm/x86_64.rb +12 -0
  111. data/lib/metasm/x86_64/compile_c.rb +1025 -0
  112. data/lib/metasm/x86_64/debug.rb +59 -0
  113. data/lib/metasm/x86_64/decode.rb +268 -0
  114. data/lib/metasm/x86_64/encode.rb +264 -0
  115. data/lib/metasm/x86_64/main.rb +135 -0
  116. data/lib/metasm/x86_64/opcodes.rb +118 -0
  117. data/lib/metasm/x86_64/parse.rb +68 -0
  118. data/misc/bottleneck.rb +61 -0
  119. data/misc/cheader-findpppath.rb +58 -0
  120. data/misc/hexdiff.rb +74 -0
  121. data/misc/hexdump.rb +55 -0
  122. data/misc/metasm-all.rb +13 -0
  123. data/misc/objdiff.rb +47 -0
  124. data/misc/objscan.rb +40 -0
  125. data/misc/pdfparse.rb +661 -0
  126. data/misc/ppc_pdf2oplist.rb +192 -0
  127. data/misc/tcp_proxy_hex.rb +84 -0
  128. data/misc/txt2html.rb +440 -0
  129. data/samples/a.out.rb +31 -0
  130. data/samples/asmsyntax.rb +77 -0
  131. data/samples/bindiff.rb +555 -0
  132. data/samples/compilation-steps.rb +49 -0
  133. data/samples/cparser_makestackoffset.rb +55 -0
  134. data/samples/dasm-backtrack.rb +38 -0
  135. data/samples/dasmnavig.rb +318 -0
  136. data/samples/dbg-apihook.rb +228 -0
  137. data/samples/dbghelp.rb +143 -0
  138. data/samples/disassemble-gui.rb +102 -0
  139. data/samples/disassemble.rb +133 -0
  140. data/samples/dump_upx.rb +95 -0
  141. data/samples/dynamic_ruby.rb +1929 -0
  142. data/samples/elf_list_needed.rb +46 -0
  143. data/samples/elf_listexports.rb +33 -0
  144. data/samples/elfencode.rb +25 -0
  145. data/samples/exeencode.rb +128 -0
  146. data/samples/factorize-headers-elfimports.rb +77 -0
  147. data/samples/factorize-headers-peimports.rb +109 -0
  148. data/samples/factorize-headers.rb +43 -0
  149. data/samples/gdbclient.rb +583 -0
  150. data/samples/generate_libsigs.rb +102 -0
  151. data/samples/hotfix_gtk_dbg.rb +59 -0
  152. data/samples/install_win_env.rb +78 -0
  153. data/samples/lindebug.rb +924 -0
  154. data/samples/linux_injectsyscall.rb +95 -0
  155. data/samples/machoencode.rb +31 -0
  156. data/samples/metasm-shell.rb +91 -0
  157. data/samples/pe-hook.rb +69 -0
  158. data/samples/pe-ia32-cpuid.rb +203 -0
  159. data/samples/pe-mips.rb +35 -0
  160. data/samples/pe-shutdown.rb +78 -0
  161. data/samples/pe-testrelocs.rb +51 -0
  162. data/samples/pe-testrsrc.rb +24 -0
  163. data/samples/pe_listexports.rb +31 -0
  164. data/samples/peencode.rb +19 -0
  165. data/samples/peldr.rb +494 -0
  166. data/samples/preprocess-flatten.rb +19 -0
  167. data/samples/r0trace.rb +308 -0
  168. data/samples/rubstop.rb +399 -0
  169. data/samples/scan_pt_gnu_stack.rb +54 -0
  170. data/samples/scanpeexports.rb +62 -0
  171. data/samples/shellcode-c.rb +40 -0
  172. data/samples/shellcode-dynlink.rb +146 -0
  173. data/samples/source.asm +34 -0
  174. data/samples/struct_offset.rb +47 -0
  175. data/samples/testpe.rb +32 -0
  176. data/samples/testraw.rb +45 -0
  177. data/samples/win32genloader.rb +132 -0
  178. data/samples/win32hooker-advanced.rb +169 -0
  179. data/samples/win32hooker.rb +96 -0
  180. data/samples/win32livedasm.rb +33 -0
  181. data/samples/win32remotescan.rb +133 -0
  182. data/samples/wintrace.rb +92 -0
  183. data/tests/all.rb +8 -0
  184. data/tests/dasm.rb +39 -0
  185. data/tests/dynldr.rb +35 -0
  186. data/tests/encodeddata.rb +132 -0
  187. data/tests/ia32.rb +82 -0
  188. data/tests/mips.rb +116 -0
  189. data/tests/parse_c.rb +239 -0
  190. data/tests/preprocessor.rb +269 -0
  191. data/tests/x86_64.rb +62 -0
  192. metadata +255 -0
@@ -0,0 +1,228 @@
1
+ # This file is part of Metasm, the Ruby assembly manipulation suite
2
+ # Copyright (C) 2006-2009 Yoann GUILLOT
3
+ #
4
+ # Licence is LGPL, see LICENCE in the top-level directory
5
+
6
+
7
+ require 'metasm/main'
8
+ require 'metasm/parse'
9
+ require 'metasm/encode'
10
+ require 'metasm/decode'
11
+ require 'metasm/exe_format/serialstruct'
12
+ require 'metasm/os/main' # VirtualFile
13
+
14
+ module Metasm
15
+ class ExeFormat
16
+ # creates a new instance, populates self.encoded with the supplied string
17
+ def self.load(str, *a, &b)
18
+ e = new(*a, &b)
19
+ if str.kind_of? EncodedData; e.encoded = str
20
+ else e.encoded << str
21
+ end
22
+ e
23
+ end
24
+
25
+ # same as load, used by AutoExe
26
+ def self.autoexe_load(*x, &b)
27
+ load(*x, &b)
28
+ end
29
+
30
+ attr_accessor :filename
31
+
32
+ # same as +load+, but from a file
33
+ # uses VirtualFile if available
34
+ def self.load_file(path, *a, &b)
35
+ e = load(VirtualFile.read(path), *a, &b)
36
+ e.filename ||= path
37
+ e
38
+ end
39
+
40
+ # +load_file+ then decode
41
+ def self.decode_file(path, *a, &b)
42
+ e = load_file(path, *a, &b)
43
+ e.decode if not e.instance_variables.map { |iv| iv.to_s }.include?("@disassembler")
44
+ e
45
+ end
46
+
47
+ # +load_file+ then decode header
48
+ def self.decode_file_header(path, *a, &b)
49
+ e = load_file(path, *a, &b)
50
+ e.decode_header
51
+ e
52
+ end
53
+
54
+ def self.decode(raw, *a, &b)
55
+ e = load(raw, *a, &b)
56
+ e.decode
57
+ e
58
+ end
59
+
60
+ def self.decode_header(raw, *a, &b)
61
+ e = load(raw, *a, &b)
62
+ e.decode_header
63
+ e
64
+ end
65
+
66
+ # creates a new object using the specified cpu, parses the asm source, and assemble
67
+ def self.assemble(cpu, source, file='<unk>', lineno=1)
68
+ source, cpu = cpu, source if source.kind_of? CPU
69
+ e = new(cpu)
70
+ e.assemble(source, file, lineno)
71
+ e
72
+ end
73
+
74
+ # same as #assemble, reads asm source from the specified file
75
+ def self.assemble_file(cpu, filename)
76
+ filename, cpu = cpu, filename if filename.kind_of? CPU
77
+ assemble(cpu, File.read(filename), filename, 1)
78
+ end
79
+
80
+ # parses a bunch of standalone C code, compile and assemble it
81
+ def compile_c(source, file='<unk>', lineno=1)
82
+ cp = @cpu.new_cparser
83
+ tune_cparser(cp)
84
+ cp.parse(source, file, lineno)
85
+ read_c_attrs cp if respond_to? :read_c_attrs
86
+ asm_source = @cpu.new_ccompiler(cp, self).compile
87
+ puts asm_source if $DEBUG
88
+ assemble(asm_source, 'C compiler output', 1)
89
+ c_set_default_entrypoint
90
+ end
91
+
92
+ # creates a new object using the specified cpu, parse/compile/assemble the C source
93
+ def self.compile_c(cpu, source, file='<unk>', lineno=1)
94
+ source, cpu = cpu, source if source.kind_of? CPU
95
+ e = new(cpu)
96
+ e.compile_c(source, file, lineno)
97
+ e
98
+ end
99
+
100
+ def self.compile_c_file(cpu, filename)
101
+ filename, cpu = cpu, filename if filename.kind_of? CPU
102
+ compile_c(cpu, File.read(filename), filename, 1)
103
+ end
104
+
105
+ # add directive to change the current assembler section to the assembler source +src+
106
+ def compile_setsection(src, section)
107
+ src << section
108
+ end
109
+
110
+ # prepare a preprocessor before it reads any source, should define macros to identify the fileformat
111
+ def tune_prepro(l)
112
+ end
113
+
114
+ # prepare a cparser
115
+ def tune_cparser(cp)
116
+ tune_prepro(cp.lexer)
117
+ end
118
+
119
+ # this is called once C code is parsed, to handle C attributes like export/import/init etc
120
+ def read_c_attrs(cp)
121
+ end
122
+
123
+ # should setup a default entrypoint for C code, including preparing args for main() etc
124
+ def c_set_default_entrypoint
125
+ end
126
+
127
+ attr_writer :disassembler # custom reader
128
+ def disassembler
129
+ @disassembler ||= init_disassembler
130
+ end
131
+
132
+ # returns the exe disassembler
133
+ # if it does not exist, creates one, and feeds it with the exe sections
134
+ def init_disassembler
135
+ @disassembler ||= Disassembler.new(self)
136
+ @disassembler.cpu ||= cpu
137
+ each_section { |edata, base|
138
+ edata ||= EncodedData.new
139
+ @disassembler.add_section edata, base
140
+ }
141
+ @disassembler
142
+ end
143
+
144
+ # disassembles the specified entrypoints
145
+ # initializes the disassembler if needed
146
+ # uses get_default_entrypoints if the argument list is empty
147
+ # returns the disassembler
148
+ def disassemble(*entrypoints)
149
+ entrypoints = get_default_entrypoints if entrypoints.empty?
150
+ disassembler.disassemble(*entrypoints)
151
+ @disassembler
152
+ end
153
+
154
+ # disassembles the specified entrypoints without backtracking
155
+ # initializes the disassembler if needed
156
+ # uses get_default_entrypoints if the argument list is empty
157
+ # returns the disassembler
158
+ def disassemble_fast_deep(*entrypoints)
159
+ entrypoints = get_default_entrypoints if entrypoints.empty?
160
+ disassembler.disassemble_fast_deep(*entrypoints)
161
+ @disassembler
162
+ end
163
+
164
+ # returns a list of entrypoints to disassemble (program entrypoint, exported functions...)
165
+ def get_default_entrypoints
166
+ []
167
+ end
168
+
169
+ # encodes the executable as a string, checks that all relocations are
170
+ # resolved, and returns the raw string version
171
+ def encode_string(*a)
172
+ encode(*a)
173
+ raise ["Unresolved relocations:", @encoded.reloc.map { |o, r| "#{r.target} " + (Backtrace.backtrace_str(r.backtrace) if r.backtrace).to_s }].join("\n") if not @encoded.reloc.empty?
174
+ @encoded.data
175
+ end
176
+
177
+ # saves the result of +encode_string+ in the specified file
178
+ # fails if the file already exists
179
+ def encode_file(path, *a)
180
+ #raise Errno::EEXIST, path if File.exist? path # race, but cannot use O_EXCL, as O_BINARY is not defined in ruby
181
+ encode_string(*a)
182
+ File.open(path, 'wb') { |fd| fd.write(@encoded.data) }
183
+ end
184
+
185
+ # returns the address at which a given file offset would be mapped
186
+ def addr_to_fileoff(addr)
187
+ addr
188
+ end
189
+
190
+ # returns the file offset where a mapped byte comes from
191
+ def fileoff_to_addr(foff)
192
+ foff
193
+ end
194
+
195
+ def shortname; self.class.name.split('::').last.downcase; end
196
+
197
+ module IntToHash
198
+ # converts a constant name to its numeric value using the hash
199
+ # {1 => 'toto', 2 => 'tata'}: 'toto' => 1, 42 => 42, 'tutu' => raise
200
+ def int_from_hash(val, hash)
201
+ val.kind_of?(Integer) ? hash.index(val) || val : hash.index(val) or raise "unknown constant #{val.inspect}"
202
+ end
203
+
204
+ # converts an array of flag constants to its numeric value using the hash
205
+ # {1 => 'toto', 2 => 'tata'}: ['toto', 'tata'] => 3, 'toto' => 2, 42 => 42
206
+ def bits_from_hash(val, hash)
207
+ val.kind_of?(Array) ? val.inject(0) { |val_, bitname| val_ | int_from_hash(bitname, hash) } : int_from_hash(val, hash)
208
+ end
209
+
210
+ # converts a numeric value to the corresponding constant name using the hash
211
+ # {1 => 'toto', 2 => 'tata'}: 1 => 'toto', 42 => 42, 'tata' => 'tata', 'tutu' => raise
212
+ def int_to_hash(val, hash)
213
+ val.kind_of?(Integer) ? hash.fetch(val, val) : (hash.index(val) ? val : raise("unknown constant #{val.inspect}"))
214
+ end
215
+
216
+ # converts a numeric value to the corresponding array of constant flag names using the hash
217
+ # {1 => 'toto', 2 => 'tata'}: 5 => ['toto', 4]
218
+ def bits_to_hash(val, hash)
219
+ (val.kind_of?(Integer) ? (hash.find_all { |k, v| val & k == k and val &= ~k }.map { |k, v| v } << val) : val.kind_of?(Array) ? val.map { |e| int_to_hash(e, hash) } : [int_to_hash(val, hash)]) - [0]
220
+ end
221
+ end
222
+ include IntToHash
223
+ end
224
+
225
+ class SerialStruct
226
+ include ExeFormat::IntToHash
227
+ end
228
+ end
@@ -0,0 +1,164 @@
1
+ # This file is part of Metasm, the Ruby assembly manipulation suite
2
+ # Copyright (C) 2006-2009 Yoann GUILLOT
3
+ #
4
+ # Licence is LGPL, see LICENCE in the top-level directory
5
+
6
+
7
+ require 'metasm/exe_format/main'
8
+ require 'metasm/encode'
9
+ require 'metasm/decode'
10
+
11
+ module Metasm
12
+ class MZ < ExeFormat
13
+ MAGIC = 'MZ' # 0x4d5a
14
+ class Header < SerialStruct
15
+ mem :magic, 2, MAGIC
16
+ words :cblp, :cp, :crlc, :cparhdr, :minalloc, :maxalloc, :ss, :sp, :csum, :ip, :cs, :lfarlc, :ovno
17
+ mem :unk, 4
18
+
19
+ def encode(mz, relocs)
20
+ h = EncodedData.new
21
+ set_default_values mz, h, relocs
22
+ h << super(mz)
23
+ end
24
+
25
+ def set_default_values(mz, h=nil, relocs=nil)
26
+ return if not h
27
+ @cblp ||= Expression[[mz.label_at(mz.body, mz.body.virtsize), :-, mz.label_at(h, 0)], :%, 512] # number of bytes used in last page
28
+ @cp ||= Expression[[mz.label_at(mz.body, mz.body.virtsize), :-, mz.label_at(h, 0)], :/, 512] # number of pages used
29
+ @crlc ||= relocs.virtsize/4
30
+ @cparhdr ||= Expression[[mz.label_at(relocs, 0), :-, mz.label_at(h, 0)], :/, 16] # header size in paragraphs (16o)
31
+ @minalloc ||= ((mz.body.virtsize - mz.body.rawsize) + 15) / 16
32
+ @maxalloc ||= @minalloc
33
+ @sp ||= 0 # ss:sp points at 1st byte of body => works if body does not reach end of segment (or maybe the overflow make the stack go to header space)
34
+ @lfarlc ||= Expression[mz.label_at(relocs, 0), :-, mz.label_at(h, 0)]
35
+
36
+ super(mz)
37
+ end
38
+
39
+ def decode(mz)
40
+ super(mz)
41
+ raise InvalidExeFormat, "Invalid MZ signature #{h.magic.inspect}" if @magic != MAGIC
42
+ end
43
+ end
44
+
45
+ class Relocation < SerialStruct
46
+ words :offset, :segment
47
+ end
48
+
49
+
50
+ # encodes a word in 16 bits
51
+ def encode_word(val) Expression[val].encode(:u16, @endianness) end
52
+ # decodes a 16bits word from self.encoded
53
+ def decode_word(edata = @encoded) edata.decode_imm(:u16, @endianness) end
54
+
55
+
56
+ attr_accessor :endianness, :header, :source
57
+ # the EncodedData representing the content of the file
58
+ attr_accessor :body
59
+ # an array of Relocations - quite obscure
60
+ attr_accessor :relocs
61
+
62
+ def initialize(cpu=nil)
63
+ @endianness = cpu ? cpu.endianness : :little
64
+ @relocs = []
65
+ @header = Header.new
66
+ @body = EncodedData.new
67
+ @source = []
68
+ super(cpu)
69
+ end
70
+
71
+ # assembles the source in the body, clears the source
72
+ def assemble(*a)
73
+ parse(*a) if not a.empty?
74
+ @body << assemble_sequence(@source, @cpu)
75
+ @body.fixup @body.binding
76
+ # XXX should create @relocs here
77
+ @source.clear
78
+ end
79
+
80
+ # sets up @cursource
81
+ def parse_init
82
+ @cursource = @source
83
+ super()
84
+ end
85
+
86
+ # encodes the header and the relocation table, return them in an array, with the body.
87
+ def pre_encode
88
+ relocs = @relocs.inject(EncodedData.new) { |edata, r| edata << r.encode(self) }
89
+ header = @header.encode self, relocs
90
+ [header, relocs, @body]
91
+ end
92
+
93
+ # defines the exe-specific parser instructions:
94
+ # .entrypoint [<label>]: defines the program entrypoint to label (or create a new label at this location)
95
+ def parse_parser_instruction(instr)
96
+ case instr.raw.downcase
97
+ when '.entrypoint'
98
+ # ".entrypoint <somelabel/expression>" or ".entrypoint" (here)
99
+ @lexer.skip_space
100
+ if tok = @lexer.nexttok and tok.type == :string
101
+ raise instr, 'syntax error' if not entrypoint = Expression.parse(@lexer)
102
+ else
103
+ entrypoint = new_label('entrypoint')
104
+ @cursource << Label.new(entrypoint, instr.backtrace.dup)
105
+ end
106
+ @header.ip = Expression[entrypoint, :-, label_at(@body, 0, 'body')]
107
+ @lexer.skip_space
108
+ raise instr, 'eol expected' if t = @lexer.nexttok and t.type != :eol
109
+ end
110
+ end
111
+
112
+
113
+ # concats the header, relocation table and body
114
+ def encode
115
+ pre_encode.inject(@encoded) { |edata, pe| edata << pe }
116
+ @encoded.fixup @encoded.binding
117
+ encode_fix_checksum
118
+ end
119
+
120
+ # sets the file checksum (untested)
121
+ def encode_fix_checksum
122
+ @encoded.ptr = 0
123
+ decode_header
124
+ mzlen = @header.cp * 512 + @header.cblp
125
+ @encoded.ptr = 0
126
+ csum = -@header.csum
127
+ (mzlen/2).times { csum += decode_word }
128
+ csum &= 0xffff
129
+ @header.csum = csum
130
+ hdr = @header.encode(self, nil)
131
+ @encoded[0, hdr.length] = hdr
132
+ end
133
+
134
+ # decodes the MZ header from the current offset in self.encoded
135
+ def decode_header
136
+ @header.decode self
137
+ end
138
+
139
+ # decodes the relocation table
140
+ def decode_relocs
141
+ @relocs.clear
142
+ @encoded.ptr = @header.lfarlc
143
+ @header.crlc.times { @relocs << Relocation.decode(self) }
144
+ end
145
+
146
+ # decodes the main part of the program
147
+ # mostly defines the 'start' export, to point to the MZ entrypoint
148
+ def decode_body
149
+ @body = @encoded[@header.cparhdr*16...@header.cp*512+@header.cblp]
150
+ @body.virtsize += @header.minalloc * 16
151
+ @body.add_export 'start', @header.cs * 16 + @header.ip
152
+ end
153
+
154
+ def decode
155
+ decode_header
156
+ decode_relocs
157
+ decode_body
158
+ end
159
+
160
+ def each_section
161
+ yield @body, 0
162
+ end
163
+ end
164
+ end
@@ -0,0 +1,172 @@
1
+ # This file is part of Metasm, the Ruby assembly manipulation suite
2
+ # Copyright (C) 2006-2009 Yoann GUILLOT
3
+ #
4
+ # Licence is LGPL, see LICENCE in the top-level directory
5
+
6
+
7
+ require 'metasm/exe_format/main'
8
+ require 'metasm/encode'
9
+ require 'metasm/decode'
10
+
11
+
12
+ module Metasm
13
+ # Nintendo DS executable file format
14
+ class NDS < ExeFormat
15
+ class Header < SerialStruct
16
+ str :title, 12
17
+ str :code, 4
18
+ str :maker, 2
19
+ bytes :unitcode, :encryptionselect, :devicetype
20
+ mem :reserved1, 9
21
+ bytes :version, :autostart
22
+ words :arm9off, :arm9entry, :arm9addr, :arm9sz
23
+ words :arm7off, :arm7entry, :arm7addr, :arm7sz
24
+ words :fnameoff, :fnamesz, :fatoff, :fatsz
25
+ words :arm9oloff, :arm9olsz, :arm7oloff, :arm7olsz
26
+ words :romctrl1, :romtcrl2, :iconoff
27
+ half :secureCRC
28
+ half :romctrl3
29
+ words :a9autoloadlist, :a7autoloadlist
30
+ mem :secareadisable, 8
31
+ words :endoff, :headersz
32
+ mem :reserved4, 56
33
+ mem :ninlogo, 156
34
+ half :logoCRC, 0xcf56
35
+ half :headerCRC
36
+ end
37
+
38
+ class Icon < SerialStruct
39
+ halfs :version, :crc
40
+ mem :reserved, 0x1c
41
+ mem :bitmap, 0x200 # 32x32, 4x4 tiles, each 4x8 bytes, 4bit depth
42
+ mem :palette, 0x20 # 16 colocs 16bits 0..0x7fff, 0 transparent (ignored)
43
+ mem :title_jap, 0x100 # 16bit unicode
44
+ mem :title_eng, 0x100
45
+ mem :title_fre, 0x100
46
+ mem :title_ger, 0x100
47
+ mem :title_ita, 0x100
48
+ mem :title_spa, 0x100
49
+ mem :unused, 0x1c0
50
+
51
+ attr_accessor :title_jap_short, :title_eng_short, :title_fre_short, :title_ger_short, :title_ita_short, :title_spa_short
52
+
53
+ def decode(exe)
54
+ super(exe)
55
+
56
+ %w[jap eng fre ger ita spa].each { |lang|
57
+ str = instance_variable_get("@title_#{lang}")
58
+ uchrs = str.unpack('v*')
59
+ str = str[0, uchrs.index(?\0).to_i*2]
60
+ instance_variable_set("@title_#{lang}", str)
61
+ str = str.unpack('v*').pack('C*')
62
+ instance_variable_set("@title_#{lang}_short", str)
63
+ }
64
+ end
65
+ end
66
+
67
+ def encode_byte(val) Expression[val].encode(:u8, @endianness) end
68
+ def encode_half(val) Expression[val].encode(:u16, @endianness) end
69
+ def encode_word(val) Expression[val].encode(:u32, @endianness) end
70
+ def decode_byte(edata = @encoded) edata.decode_imm(:u8, @endianness) end
71
+ def decode_half(edata = @encoded) edata.decode_imm(:u16, @endianness) end
72
+ def decode_word(edata = @encoded) edata.decode_imm(:u32, @endianness) end
73
+
74
+
75
+ attr_accessor :header, :icon, :arm9, :arm7
76
+ attr_accessor :files, :fat
77
+
78
+ def initialize(endianness=:little)
79
+ @endianness = endianness
80
+ @encoded = EncodedData.new
81
+ end
82
+
83
+ # decodes the header from the current offset in self.encoded
84
+ def decode_header
85
+ @header = Header.decode(self)
86
+ end
87
+
88
+ def decode_icon
89
+ @encoded.ptr = @header.iconoff
90
+ @icon = Icon.decode(self)
91
+ end
92
+
93
+ def decode
94
+ decode_header
95
+ decode_icon
96
+ @arm9 = @encoded[@header.arm9off, @header.arm9sz]
97
+ @arm7 = @encoded[@header.arm7off, @header.arm7sz]
98
+ @arm9.add_export('entrypoint', @header.arm9entry - @header.arm9addr)
99
+ @arm7.add_export('entrypoint_arm7', @header.arm7entry - @header.arm7addr)
100
+ end
101
+
102
+ def decode_fat
103
+ # decode the files section
104
+ # it is just the tree structure of a file hierarchy
105
+ # no indication whatsoever on where to find individual file content
106
+ f = @encoded[@fnameoff, @fnamesz]
107
+ f.ptr = 0
108
+ idx = []
109
+ # 1st word = size of index subsection
110
+ idxsz = decode_word(f)
111
+ f.ptr = 0
112
+ # index seems to be an array of word, half, half (offset of name, index of name of first file, index of name of first subdir)
113
+ (idxsz/8).times { idx << [decode_word(f), decode_half(f), decode_half(f)] }
114
+ # follows a serie of filenames : 1-byte length, name
115
+ # if length has high bit set, name is a directory, content = index[half following the name]
116
+ dat = []
117
+ idx.each { |off, idf, idd|
118
+ f.ptr = off
119
+ dat << []
120
+ while (l = decode_byte(f)) > 0
121
+ name = f.read(l&0x7f)
122
+ if l & 0x80 > 0
123
+ i = decode_half(f)
124
+ dat.last << { name => i.to_s(16) }
125
+ else
126
+ dat.last << name
127
+ end
128
+ end
129
+ }
130
+
131
+ # build the tree from the serialized data
132
+ # directory = array of [hash (subdirname => directory) or string (filename)]
133
+ tree = dat.map { |dt| dt.map { |d| d.dup } }
134
+ tree.each { |br|
135
+ br.grep(Hash).each { |b|
136
+ b.each { |k, v| b[k] = tree[v.to_i(16) & 0xfff] }
137
+ }
138
+ }
139
+ tree = tree.first
140
+
141
+ # flatten the tree to a list of fullpath
142
+ iter = lambda { |ar, cur|
143
+ ret = []
144
+ ar.each { |elem|
145
+ case elem
146
+ when Hash; ret.concat iter[elem.values.first, cur + elem.keys.first + '/']
147
+ else ret << (cur + elem)
148
+ end
149
+ }
150
+ ret
151
+ }
152
+
153
+ @files = tree #iter[tree, '/']
154
+
155
+ encoded.ptr = @fatoff
156
+ @fat = encoded.read(@fatsz)
157
+ end
158
+
159
+ def cpu_from_headers
160
+ ARM.new
161
+ end
162
+
163
+ def each_section
164
+ yield @arm9, @header.arm9addr
165
+ yield @arm7, @header.arm7addr
166
+ end
167
+
168
+ def get_default_entrypoints
169
+ [@header.arm9entry, @header.arm7entry]
170
+ end
171
+ end
172
+ end