metasm 1.0.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (192) hide show
  1. data/BUGS +11 -0
  2. data/CREDITS +17 -0
  3. data/README +270 -0
  4. data/TODO +114 -0
  5. data/doc/code_organisation.txt +146 -0
  6. data/doc/const_missing.txt +16 -0
  7. data/doc/core_classes.txt +75 -0
  8. data/doc/feature_list.txt +53 -0
  9. data/doc/index.txt +59 -0
  10. data/doc/install_notes.txt +170 -0
  11. data/doc/style.css +3 -0
  12. data/doc/use_cases.txt +18 -0
  13. data/lib/metasm.rb +80 -0
  14. data/lib/metasm/arm.rb +12 -0
  15. data/lib/metasm/arm/debug.rb +39 -0
  16. data/lib/metasm/arm/decode.rb +167 -0
  17. data/lib/metasm/arm/encode.rb +77 -0
  18. data/lib/metasm/arm/main.rb +75 -0
  19. data/lib/metasm/arm/opcodes.rb +177 -0
  20. data/lib/metasm/arm/parse.rb +130 -0
  21. data/lib/metasm/arm/render.rb +55 -0
  22. data/lib/metasm/compile_c.rb +1457 -0
  23. data/lib/metasm/dalvik.rb +8 -0
  24. data/lib/metasm/dalvik/decode.rb +196 -0
  25. data/lib/metasm/dalvik/main.rb +60 -0
  26. data/lib/metasm/dalvik/opcodes.rb +366 -0
  27. data/lib/metasm/decode.rb +213 -0
  28. data/lib/metasm/decompile.rb +2659 -0
  29. data/lib/metasm/disassemble.rb +2068 -0
  30. data/lib/metasm/disassemble_api.rb +1280 -0
  31. data/lib/metasm/dynldr.rb +1329 -0
  32. data/lib/metasm/encode.rb +333 -0
  33. data/lib/metasm/exe_format/a_out.rb +194 -0
  34. data/lib/metasm/exe_format/autoexe.rb +82 -0
  35. data/lib/metasm/exe_format/bflt.rb +189 -0
  36. data/lib/metasm/exe_format/coff.rb +455 -0
  37. data/lib/metasm/exe_format/coff_decode.rb +901 -0
  38. data/lib/metasm/exe_format/coff_encode.rb +1078 -0
  39. data/lib/metasm/exe_format/dex.rb +457 -0
  40. data/lib/metasm/exe_format/dol.rb +145 -0
  41. data/lib/metasm/exe_format/elf.rb +923 -0
  42. data/lib/metasm/exe_format/elf_decode.rb +979 -0
  43. data/lib/metasm/exe_format/elf_encode.rb +1375 -0
  44. data/lib/metasm/exe_format/macho.rb +827 -0
  45. data/lib/metasm/exe_format/main.rb +228 -0
  46. data/lib/metasm/exe_format/mz.rb +164 -0
  47. data/lib/metasm/exe_format/nds.rb +172 -0
  48. data/lib/metasm/exe_format/pe.rb +437 -0
  49. data/lib/metasm/exe_format/serialstruct.rb +246 -0
  50. data/lib/metasm/exe_format/shellcode.rb +114 -0
  51. data/lib/metasm/exe_format/xcoff.rb +167 -0
  52. data/lib/metasm/gui.rb +23 -0
  53. data/lib/metasm/gui/cstruct.rb +373 -0
  54. data/lib/metasm/gui/dasm_coverage.rb +199 -0
  55. data/lib/metasm/gui/dasm_decomp.rb +369 -0
  56. data/lib/metasm/gui/dasm_funcgraph.rb +103 -0
  57. data/lib/metasm/gui/dasm_graph.rb +1354 -0
  58. data/lib/metasm/gui/dasm_hex.rb +543 -0
  59. data/lib/metasm/gui/dasm_listing.rb +599 -0
  60. data/lib/metasm/gui/dasm_main.rb +906 -0
  61. data/lib/metasm/gui/dasm_opcodes.rb +291 -0
  62. data/lib/metasm/gui/debug.rb +1228 -0
  63. data/lib/metasm/gui/gtk.rb +884 -0
  64. data/lib/metasm/gui/qt.rb +495 -0
  65. data/lib/metasm/gui/win32.rb +3004 -0
  66. data/lib/metasm/gui/x11.rb +621 -0
  67. data/lib/metasm/ia32.rb +14 -0
  68. data/lib/metasm/ia32/compile_c.rb +1523 -0
  69. data/lib/metasm/ia32/debug.rb +193 -0
  70. data/lib/metasm/ia32/decode.rb +1167 -0
  71. data/lib/metasm/ia32/decompile.rb +564 -0
  72. data/lib/metasm/ia32/encode.rb +314 -0
  73. data/lib/metasm/ia32/main.rb +233 -0
  74. data/lib/metasm/ia32/opcodes.rb +872 -0
  75. data/lib/metasm/ia32/parse.rb +327 -0
  76. data/lib/metasm/ia32/render.rb +91 -0
  77. data/lib/metasm/main.rb +1193 -0
  78. data/lib/metasm/mips.rb +11 -0
  79. data/lib/metasm/mips/compile_c.rb +7 -0
  80. data/lib/metasm/mips/decode.rb +253 -0
  81. data/lib/metasm/mips/encode.rb +51 -0
  82. data/lib/metasm/mips/main.rb +72 -0
  83. data/lib/metasm/mips/opcodes.rb +443 -0
  84. data/lib/metasm/mips/parse.rb +51 -0
  85. data/lib/metasm/mips/render.rb +43 -0
  86. data/lib/metasm/os/gnu_exports.rb +270 -0
  87. data/lib/metasm/os/linux.rb +1112 -0
  88. data/lib/metasm/os/main.rb +1686 -0
  89. data/lib/metasm/os/remote.rb +527 -0
  90. data/lib/metasm/os/windows.rb +2027 -0
  91. data/lib/metasm/os/windows_exports.rb +745 -0
  92. data/lib/metasm/parse.rb +876 -0
  93. data/lib/metasm/parse_c.rb +3938 -0
  94. data/lib/metasm/pic16c/decode.rb +42 -0
  95. data/lib/metasm/pic16c/main.rb +17 -0
  96. data/lib/metasm/pic16c/opcodes.rb +68 -0
  97. data/lib/metasm/ppc.rb +11 -0
  98. data/lib/metasm/ppc/decode.rb +264 -0
  99. data/lib/metasm/ppc/decompile.rb +251 -0
  100. data/lib/metasm/ppc/encode.rb +51 -0
  101. data/lib/metasm/ppc/main.rb +129 -0
  102. data/lib/metasm/ppc/opcodes.rb +410 -0
  103. data/lib/metasm/ppc/parse.rb +52 -0
  104. data/lib/metasm/preprocessor.rb +1277 -0
  105. data/lib/metasm/render.rb +130 -0
  106. data/lib/metasm/sh4.rb +8 -0
  107. data/lib/metasm/sh4/decode.rb +336 -0
  108. data/lib/metasm/sh4/main.rb +292 -0
  109. data/lib/metasm/sh4/opcodes.rb +381 -0
  110. data/lib/metasm/x86_64.rb +12 -0
  111. data/lib/metasm/x86_64/compile_c.rb +1025 -0
  112. data/lib/metasm/x86_64/debug.rb +59 -0
  113. data/lib/metasm/x86_64/decode.rb +268 -0
  114. data/lib/metasm/x86_64/encode.rb +264 -0
  115. data/lib/metasm/x86_64/main.rb +135 -0
  116. data/lib/metasm/x86_64/opcodes.rb +118 -0
  117. data/lib/metasm/x86_64/parse.rb +68 -0
  118. data/misc/bottleneck.rb +61 -0
  119. data/misc/cheader-findpppath.rb +58 -0
  120. data/misc/hexdiff.rb +74 -0
  121. data/misc/hexdump.rb +55 -0
  122. data/misc/metasm-all.rb +13 -0
  123. data/misc/objdiff.rb +47 -0
  124. data/misc/objscan.rb +40 -0
  125. data/misc/pdfparse.rb +661 -0
  126. data/misc/ppc_pdf2oplist.rb +192 -0
  127. data/misc/tcp_proxy_hex.rb +84 -0
  128. data/misc/txt2html.rb +440 -0
  129. data/samples/a.out.rb +31 -0
  130. data/samples/asmsyntax.rb +77 -0
  131. data/samples/bindiff.rb +555 -0
  132. data/samples/compilation-steps.rb +49 -0
  133. data/samples/cparser_makestackoffset.rb +55 -0
  134. data/samples/dasm-backtrack.rb +38 -0
  135. data/samples/dasmnavig.rb +318 -0
  136. data/samples/dbg-apihook.rb +228 -0
  137. data/samples/dbghelp.rb +143 -0
  138. data/samples/disassemble-gui.rb +102 -0
  139. data/samples/disassemble.rb +133 -0
  140. data/samples/dump_upx.rb +95 -0
  141. data/samples/dynamic_ruby.rb +1929 -0
  142. data/samples/elf_list_needed.rb +46 -0
  143. data/samples/elf_listexports.rb +33 -0
  144. data/samples/elfencode.rb +25 -0
  145. data/samples/exeencode.rb +128 -0
  146. data/samples/factorize-headers-elfimports.rb +77 -0
  147. data/samples/factorize-headers-peimports.rb +109 -0
  148. data/samples/factorize-headers.rb +43 -0
  149. data/samples/gdbclient.rb +583 -0
  150. data/samples/generate_libsigs.rb +102 -0
  151. data/samples/hotfix_gtk_dbg.rb +59 -0
  152. data/samples/install_win_env.rb +78 -0
  153. data/samples/lindebug.rb +924 -0
  154. data/samples/linux_injectsyscall.rb +95 -0
  155. data/samples/machoencode.rb +31 -0
  156. data/samples/metasm-shell.rb +91 -0
  157. data/samples/pe-hook.rb +69 -0
  158. data/samples/pe-ia32-cpuid.rb +203 -0
  159. data/samples/pe-mips.rb +35 -0
  160. data/samples/pe-shutdown.rb +78 -0
  161. data/samples/pe-testrelocs.rb +51 -0
  162. data/samples/pe-testrsrc.rb +24 -0
  163. data/samples/pe_listexports.rb +31 -0
  164. data/samples/peencode.rb +19 -0
  165. data/samples/peldr.rb +494 -0
  166. data/samples/preprocess-flatten.rb +19 -0
  167. data/samples/r0trace.rb +308 -0
  168. data/samples/rubstop.rb +399 -0
  169. data/samples/scan_pt_gnu_stack.rb +54 -0
  170. data/samples/scanpeexports.rb +62 -0
  171. data/samples/shellcode-c.rb +40 -0
  172. data/samples/shellcode-dynlink.rb +146 -0
  173. data/samples/source.asm +34 -0
  174. data/samples/struct_offset.rb +47 -0
  175. data/samples/testpe.rb +32 -0
  176. data/samples/testraw.rb +45 -0
  177. data/samples/win32genloader.rb +132 -0
  178. data/samples/win32hooker-advanced.rb +169 -0
  179. data/samples/win32hooker.rb +96 -0
  180. data/samples/win32livedasm.rb +33 -0
  181. data/samples/win32remotescan.rb +133 -0
  182. data/samples/wintrace.rb +92 -0
  183. data/tests/all.rb +8 -0
  184. data/tests/dasm.rb +39 -0
  185. data/tests/dynldr.rb +35 -0
  186. data/tests/encodeddata.rb +132 -0
  187. data/tests/ia32.rb +82 -0
  188. data/tests/mips.rb +116 -0
  189. data/tests/parse_c.rb +239 -0
  190. data/tests/preprocessor.rb +269 -0
  191. data/tests/x86_64.rb +62 -0
  192. metadata +255 -0
@@ -0,0 +1,228 @@
1
+ # This file is part of Metasm, the Ruby assembly manipulation suite
2
+ # Copyright (C) 2006-2009 Yoann GUILLOT
3
+ #
4
+ # Licence is LGPL, see LICENCE in the top-level directory
5
+
6
+
7
+ require 'metasm/main'
8
+ require 'metasm/parse'
9
+ require 'metasm/encode'
10
+ require 'metasm/decode'
11
+ require 'metasm/exe_format/serialstruct'
12
+ require 'metasm/os/main' # VirtualFile
13
+
14
+ module Metasm
15
+ class ExeFormat
16
+ # creates a new instance, populates self.encoded with the supplied string
17
+ def self.load(str, *a, &b)
18
+ e = new(*a, &b)
19
+ if str.kind_of? EncodedData; e.encoded = str
20
+ else e.encoded << str
21
+ end
22
+ e
23
+ end
24
+
25
+ # same as load, used by AutoExe
26
+ def self.autoexe_load(*x, &b)
27
+ load(*x, &b)
28
+ end
29
+
30
+ attr_accessor :filename
31
+
32
+ # same as +load+, but from a file
33
+ # uses VirtualFile if available
34
+ def self.load_file(path, *a, &b)
35
+ e = load(VirtualFile.read(path), *a, &b)
36
+ e.filename ||= path
37
+ e
38
+ end
39
+
40
+ # +load_file+ then decode
41
+ def self.decode_file(path, *a, &b)
42
+ e = load_file(path, *a, &b)
43
+ e.decode if not e.instance_variables.map { |iv| iv.to_s }.include?("@disassembler")
44
+ e
45
+ end
46
+
47
+ # +load_file+ then decode header
48
+ def self.decode_file_header(path, *a, &b)
49
+ e = load_file(path, *a, &b)
50
+ e.decode_header
51
+ e
52
+ end
53
+
54
+ def self.decode(raw, *a, &b)
55
+ e = load(raw, *a, &b)
56
+ e.decode
57
+ e
58
+ end
59
+
60
+ def self.decode_header(raw, *a, &b)
61
+ e = load(raw, *a, &b)
62
+ e.decode_header
63
+ e
64
+ end
65
+
66
+ # creates a new object using the specified cpu, parses the asm source, and assemble
67
+ def self.assemble(cpu, source, file='<unk>', lineno=1)
68
+ source, cpu = cpu, source if source.kind_of? CPU
69
+ e = new(cpu)
70
+ e.assemble(source, file, lineno)
71
+ e
72
+ end
73
+
74
+ # same as #assemble, reads asm source from the specified file
75
+ def self.assemble_file(cpu, filename)
76
+ filename, cpu = cpu, filename if filename.kind_of? CPU
77
+ assemble(cpu, File.read(filename), filename, 1)
78
+ end
79
+
80
+ # parses a bunch of standalone C code, compile and assemble it
81
+ def compile_c(source, file='<unk>', lineno=1)
82
+ cp = @cpu.new_cparser
83
+ tune_cparser(cp)
84
+ cp.parse(source, file, lineno)
85
+ read_c_attrs cp if respond_to? :read_c_attrs
86
+ asm_source = @cpu.new_ccompiler(cp, self).compile
87
+ puts asm_source if $DEBUG
88
+ assemble(asm_source, 'C compiler output', 1)
89
+ c_set_default_entrypoint
90
+ end
91
+
92
+ # creates a new object using the specified cpu, parse/compile/assemble the C source
93
+ def self.compile_c(cpu, source, file='<unk>', lineno=1)
94
+ source, cpu = cpu, source if source.kind_of? CPU
95
+ e = new(cpu)
96
+ e.compile_c(source, file, lineno)
97
+ e
98
+ end
99
+
100
+ def self.compile_c_file(cpu, filename)
101
+ filename, cpu = cpu, filename if filename.kind_of? CPU
102
+ compile_c(cpu, File.read(filename), filename, 1)
103
+ end
104
+
105
+ # add directive to change the current assembler section to the assembler source +src+
106
+ def compile_setsection(src, section)
107
+ src << section
108
+ end
109
+
110
+ # prepare a preprocessor before it reads any source, should define macros to identify the fileformat
111
+ def tune_prepro(l)
112
+ end
113
+
114
+ # prepare a cparser
115
+ def tune_cparser(cp)
116
+ tune_prepro(cp.lexer)
117
+ end
118
+
119
+ # this is called once C code is parsed, to handle C attributes like export/import/init etc
120
+ def read_c_attrs(cp)
121
+ end
122
+
123
+ # should setup a default entrypoint for C code, including preparing args for main() etc
124
+ def c_set_default_entrypoint
125
+ end
126
+
127
+ attr_writer :disassembler # custom reader
128
+ def disassembler
129
+ @disassembler ||= init_disassembler
130
+ end
131
+
132
+ # returns the exe disassembler
133
+ # if it does not exist, creates one, and feeds it with the exe sections
134
+ def init_disassembler
135
+ @disassembler ||= Disassembler.new(self)
136
+ @disassembler.cpu ||= cpu
137
+ each_section { |edata, base|
138
+ edata ||= EncodedData.new
139
+ @disassembler.add_section edata, base
140
+ }
141
+ @disassembler
142
+ end
143
+
144
+ # disassembles the specified entrypoints
145
+ # initializes the disassembler if needed
146
+ # uses get_default_entrypoints if the argument list is empty
147
+ # returns the disassembler
148
+ def disassemble(*entrypoints)
149
+ entrypoints = get_default_entrypoints if entrypoints.empty?
150
+ disassembler.disassemble(*entrypoints)
151
+ @disassembler
152
+ end
153
+
154
+ # disassembles the specified entrypoints without backtracking
155
+ # initializes the disassembler if needed
156
+ # uses get_default_entrypoints if the argument list is empty
157
+ # returns the disassembler
158
+ def disassemble_fast_deep(*entrypoints)
159
+ entrypoints = get_default_entrypoints if entrypoints.empty?
160
+ disassembler.disassemble_fast_deep(*entrypoints)
161
+ @disassembler
162
+ end
163
+
164
+ # returns a list of entrypoints to disassemble (program entrypoint, exported functions...)
165
+ def get_default_entrypoints
166
+ []
167
+ end
168
+
169
+ # encodes the executable as a string, checks that all relocations are
170
+ # resolved, and returns the raw string version
171
+ def encode_string(*a)
172
+ encode(*a)
173
+ raise ["Unresolved relocations:", @encoded.reloc.map { |o, r| "#{r.target} " + (Backtrace.backtrace_str(r.backtrace) if r.backtrace).to_s }].join("\n") if not @encoded.reloc.empty?
174
+ @encoded.data
175
+ end
176
+
177
+ # saves the result of +encode_string+ in the specified file
178
+ # fails if the file already exists
179
+ def encode_file(path, *a)
180
+ #raise Errno::EEXIST, path if File.exist? path # race, but cannot use O_EXCL, as O_BINARY is not defined in ruby
181
+ encode_string(*a)
182
+ File.open(path, 'wb') { |fd| fd.write(@encoded.data) }
183
+ end
184
+
185
+ # returns the address at which a given file offset would be mapped
186
+ def addr_to_fileoff(addr)
187
+ addr
188
+ end
189
+
190
+ # returns the file offset where a mapped byte comes from
191
+ def fileoff_to_addr(foff)
192
+ foff
193
+ end
194
+
195
+ def shortname; self.class.name.split('::').last.downcase; end
196
+
197
+ module IntToHash
198
+ # converts a constant name to its numeric value using the hash
199
+ # {1 => 'toto', 2 => 'tata'}: 'toto' => 1, 42 => 42, 'tutu' => raise
200
+ def int_from_hash(val, hash)
201
+ val.kind_of?(Integer) ? hash.index(val) || val : hash.index(val) or raise "unknown constant #{val.inspect}"
202
+ end
203
+
204
+ # converts an array of flag constants to its numeric value using the hash
205
+ # {1 => 'toto', 2 => 'tata'}: ['toto', 'tata'] => 3, 'toto' => 2, 42 => 42
206
+ def bits_from_hash(val, hash)
207
+ val.kind_of?(Array) ? val.inject(0) { |val_, bitname| val_ | int_from_hash(bitname, hash) } : int_from_hash(val, hash)
208
+ end
209
+
210
+ # converts a numeric value to the corresponding constant name using the hash
211
+ # {1 => 'toto', 2 => 'tata'}: 1 => 'toto', 42 => 42, 'tata' => 'tata', 'tutu' => raise
212
+ def int_to_hash(val, hash)
213
+ val.kind_of?(Integer) ? hash.fetch(val, val) : (hash.index(val) ? val : raise("unknown constant #{val.inspect}"))
214
+ end
215
+
216
+ # converts a numeric value to the corresponding array of constant flag names using the hash
217
+ # {1 => 'toto', 2 => 'tata'}: 5 => ['toto', 4]
218
+ def bits_to_hash(val, hash)
219
+ (val.kind_of?(Integer) ? (hash.find_all { |k, v| val & k == k and val &= ~k }.map { |k, v| v } << val) : val.kind_of?(Array) ? val.map { |e| int_to_hash(e, hash) } : [int_to_hash(val, hash)]) - [0]
220
+ end
221
+ end
222
+ include IntToHash
223
+ end
224
+
225
+ class SerialStruct
226
+ include ExeFormat::IntToHash
227
+ end
228
+ end
@@ -0,0 +1,164 @@
1
+ # This file is part of Metasm, the Ruby assembly manipulation suite
2
+ # Copyright (C) 2006-2009 Yoann GUILLOT
3
+ #
4
+ # Licence is LGPL, see LICENCE in the top-level directory
5
+
6
+
7
+ require 'metasm/exe_format/main'
8
+ require 'metasm/encode'
9
+ require 'metasm/decode'
10
+
11
+ module Metasm
12
+ class MZ < ExeFormat
13
+ MAGIC = 'MZ' # 0x4d5a
14
+ class Header < SerialStruct
15
+ mem :magic, 2, MAGIC
16
+ words :cblp, :cp, :crlc, :cparhdr, :minalloc, :maxalloc, :ss, :sp, :csum, :ip, :cs, :lfarlc, :ovno
17
+ mem :unk, 4
18
+
19
+ def encode(mz, relocs)
20
+ h = EncodedData.new
21
+ set_default_values mz, h, relocs
22
+ h << super(mz)
23
+ end
24
+
25
+ def set_default_values(mz, h=nil, relocs=nil)
26
+ return if not h
27
+ @cblp ||= Expression[[mz.label_at(mz.body, mz.body.virtsize), :-, mz.label_at(h, 0)], :%, 512] # number of bytes used in last page
28
+ @cp ||= Expression[[mz.label_at(mz.body, mz.body.virtsize), :-, mz.label_at(h, 0)], :/, 512] # number of pages used
29
+ @crlc ||= relocs.virtsize/4
30
+ @cparhdr ||= Expression[[mz.label_at(relocs, 0), :-, mz.label_at(h, 0)], :/, 16] # header size in paragraphs (16o)
31
+ @minalloc ||= ((mz.body.virtsize - mz.body.rawsize) + 15) / 16
32
+ @maxalloc ||= @minalloc
33
+ @sp ||= 0 # ss:sp points at 1st byte of body => works if body does not reach end of segment (or maybe the overflow make the stack go to header space)
34
+ @lfarlc ||= Expression[mz.label_at(relocs, 0), :-, mz.label_at(h, 0)]
35
+
36
+ super(mz)
37
+ end
38
+
39
+ def decode(mz)
40
+ super(mz)
41
+ raise InvalidExeFormat, "Invalid MZ signature #{h.magic.inspect}" if @magic != MAGIC
42
+ end
43
+ end
44
+
45
+ class Relocation < SerialStruct
46
+ words :offset, :segment
47
+ end
48
+
49
+
50
+ # encodes a word in 16 bits
51
+ def encode_word(val) Expression[val].encode(:u16, @endianness) end
52
+ # decodes a 16bits word from self.encoded
53
+ def decode_word(edata = @encoded) edata.decode_imm(:u16, @endianness) end
54
+
55
+
56
+ attr_accessor :endianness, :header, :source
57
+ # the EncodedData representing the content of the file
58
+ attr_accessor :body
59
+ # an array of Relocations - quite obscure
60
+ attr_accessor :relocs
61
+
62
+ def initialize(cpu=nil)
63
+ @endianness = cpu ? cpu.endianness : :little
64
+ @relocs = []
65
+ @header = Header.new
66
+ @body = EncodedData.new
67
+ @source = []
68
+ super(cpu)
69
+ end
70
+
71
+ # assembles the source in the body, clears the source
72
+ def assemble(*a)
73
+ parse(*a) if not a.empty?
74
+ @body << assemble_sequence(@source, @cpu)
75
+ @body.fixup @body.binding
76
+ # XXX should create @relocs here
77
+ @source.clear
78
+ end
79
+
80
+ # sets up @cursource
81
+ def parse_init
82
+ @cursource = @source
83
+ super()
84
+ end
85
+
86
+ # encodes the header and the relocation table, return them in an array, with the body.
87
+ def pre_encode
88
+ relocs = @relocs.inject(EncodedData.new) { |edata, r| edata << r.encode(self) }
89
+ header = @header.encode self, relocs
90
+ [header, relocs, @body]
91
+ end
92
+
93
+ # defines the exe-specific parser instructions:
94
+ # .entrypoint [<label>]: defines the program entrypoint to label (or create a new label at this location)
95
+ def parse_parser_instruction(instr)
96
+ case instr.raw.downcase
97
+ when '.entrypoint'
98
+ # ".entrypoint <somelabel/expression>" or ".entrypoint" (here)
99
+ @lexer.skip_space
100
+ if tok = @lexer.nexttok and tok.type == :string
101
+ raise instr, 'syntax error' if not entrypoint = Expression.parse(@lexer)
102
+ else
103
+ entrypoint = new_label('entrypoint')
104
+ @cursource << Label.new(entrypoint, instr.backtrace.dup)
105
+ end
106
+ @header.ip = Expression[entrypoint, :-, label_at(@body, 0, 'body')]
107
+ @lexer.skip_space
108
+ raise instr, 'eol expected' if t = @lexer.nexttok and t.type != :eol
109
+ end
110
+ end
111
+
112
+
113
+ # concats the header, relocation table and body
114
+ def encode
115
+ pre_encode.inject(@encoded) { |edata, pe| edata << pe }
116
+ @encoded.fixup @encoded.binding
117
+ encode_fix_checksum
118
+ end
119
+
120
+ # sets the file checksum (untested)
121
+ def encode_fix_checksum
122
+ @encoded.ptr = 0
123
+ decode_header
124
+ mzlen = @header.cp * 512 + @header.cblp
125
+ @encoded.ptr = 0
126
+ csum = -@header.csum
127
+ (mzlen/2).times { csum += decode_word }
128
+ csum &= 0xffff
129
+ @header.csum = csum
130
+ hdr = @header.encode(self, nil)
131
+ @encoded[0, hdr.length] = hdr
132
+ end
133
+
134
+ # decodes the MZ header from the current offset in self.encoded
135
+ def decode_header
136
+ @header.decode self
137
+ end
138
+
139
+ # decodes the relocation table
140
+ def decode_relocs
141
+ @relocs.clear
142
+ @encoded.ptr = @header.lfarlc
143
+ @header.crlc.times { @relocs << Relocation.decode(self) }
144
+ end
145
+
146
+ # decodes the main part of the program
147
+ # mostly defines the 'start' export, to point to the MZ entrypoint
148
+ def decode_body
149
+ @body = @encoded[@header.cparhdr*16...@header.cp*512+@header.cblp]
150
+ @body.virtsize += @header.minalloc * 16
151
+ @body.add_export 'start', @header.cs * 16 + @header.ip
152
+ end
153
+
154
+ def decode
155
+ decode_header
156
+ decode_relocs
157
+ decode_body
158
+ end
159
+
160
+ def each_section
161
+ yield @body, 0
162
+ end
163
+ end
164
+ end
@@ -0,0 +1,172 @@
1
+ # This file is part of Metasm, the Ruby assembly manipulation suite
2
+ # Copyright (C) 2006-2009 Yoann GUILLOT
3
+ #
4
+ # Licence is LGPL, see LICENCE in the top-level directory
5
+
6
+
7
+ require 'metasm/exe_format/main'
8
+ require 'metasm/encode'
9
+ require 'metasm/decode'
10
+
11
+
12
+ module Metasm
13
+ # Nintendo DS executable file format
14
+ class NDS < ExeFormat
15
+ class Header < SerialStruct
16
+ str :title, 12
17
+ str :code, 4
18
+ str :maker, 2
19
+ bytes :unitcode, :encryptionselect, :devicetype
20
+ mem :reserved1, 9
21
+ bytes :version, :autostart
22
+ words :arm9off, :arm9entry, :arm9addr, :arm9sz
23
+ words :arm7off, :arm7entry, :arm7addr, :arm7sz
24
+ words :fnameoff, :fnamesz, :fatoff, :fatsz
25
+ words :arm9oloff, :arm9olsz, :arm7oloff, :arm7olsz
26
+ words :romctrl1, :romtcrl2, :iconoff
27
+ half :secureCRC
28
+ half :romctrl3
29
+ words :a9autoloadlist, :a7autoloadlist
30
+ mem :secareadisable, 8
31
+ words :endoff, :headersz
32
+ mem :reserved4, 56
33
+ mem :ninlogo, 156
34
+ half :logoCRC, 0xcf56
35
+ half :headerCRC
36
+ end
37
+
38
+ class Icon < SerialStruct
39
+ halfs :version, :crc
40
+ mem :reserved, 0x1c
41
+ mem :bitmap, 0x200 # 32x32, 4x4 tiles, each 4x8 bytes, 4bit depth
42
+ mem :palette, 0x20 # 16 colocs 16bits 0..0x7fff, 0 transparent (ignored)
43
+ mem :title_jap, 0x100 # 16bit unicode
44
+ mem :title_eng, 0x100
45
+ mem :title_fre, 0x100
46
+ mem :title_ger, 0x100
47
+ mem :title_ita, 0x100
48
+ mem :title_spa, 0x100
49
+ mem :unused, 0x1c0
50
+
51
+ attr_accessor :title_jap_short, :title_eng_short, :title_fre_short, :title_ger_short, :title_ita_short, :title_spa_short
52
+
53
+ def decode(exe)
54
+ super(exe)
55
+
56
+ %w[jap eng fre ger ita spa].each { |lang|
57
+ str = instance_variable_get("@title_#{lang}")
58
+ uchrs = str.unpack('v*')
59
+ str = str[0, uchrs.index(?\0).to_i*2]
60
+ instance_variable_set("@title_#{lang}", str)
61
+ str = str.unpack('v*').pack('C*')
62
+ instance_variable_set("@title_#{lang}_short", str)
63
+ }
64
+ end
65
+ end
66
+
67
+ def encode_byte(val) Expression[val].encode(:u8, @endianness) end
68
+ def encode_half(val) Expression[val].encode(:u16, @endianness) end
69
+ def encode_word(val) Expression[val].encode(:u32, @endianness) end
70
+ def decode_byte(edata = @encoded) edata.decode_imm(:u8, @endianness) end
71
+ def decode_half(edata = @encoded) edata.decode_imm(:u16, @endianness) end
72
+ def decode_word(edata = @encoded) edata.decode_imm(:u32, @endianness) end
73
+
74
+
75
+ attr_accessor :header, :icon, :arm9, :arm7
76
+ attr_accessor :files, :fat
77
+
78
+ def initialize(endianness=:little)
79
+ @endianness = endianness
80
+ @encoded = EncodedData.new
81
+ end
82
+
83
+ # decodes the header from the current offset in self.encoded
84
+ def decode_header
85
+ @header = Header.decode(self)
86
+ end
87
+
88
+ def decode_icon
89
+ @encoded.ptr = @header.iconoff
90
+ @icon = Icon.decode(self)
91
+ end
92
+
93
+ def decode
94
+ decode_header
95
+ decode_icon
96
+ @arm9 = @encoded[@header.arm9off, @header.arm9sz]
97
+ @arm7 = @encoded[@header.arm7off, @header.arm7sz]
98
+ @arm9.add_export('entrypoint', @header.arm9entry - @header.arm9addr)
99
+ @arm7.add_export('entrypoint_arm7', @header.arm7entry - @header.arm7addr)
100
+ end
101
+
102
+ def decode_fat
103
+ # decode the files section
104
+ # it is just the tree structure of a file hierarchy
105
+ # no indication whatsoever on where to find individual file content
106
+ f = @encoded[@fnameoff, @fnamesz]
107
+ f.ptr = 0
108
+ idx = []
109
+ # 1st word = size of index subsection
110
+ idxsz = decode_word(f)
111
+ f.ptr = 0
112
+ # index seems to be an array of word, half, half (offset of name, index of name of first file, index of name of first subdir)
113
+ (idxsz/8).times { idx << [decode_word(f), decode_half(f), decode_half(f)] }
114
+ # follows a serie of filenames : 1-byte length, name
115
+ # if length has high bit set, name is a directory, content = index[half following the name]
116
+ dat = []
117
+ idx.each { |off, idf, idd|
118
+ f.ptr = off
119
+ dat << []
120
+ while (l = decode_byte(f)) > 0
121
+ name = f.read(l&0x7f)
122
+ if l & 0x80 > 0
123
+ i = decode_half(f)
124
+ dat.last << { name => i.to_s(16) }
125
+ else
126
+ dat.last << name
127
+ end
128
+ end
129
+ }
130
+
131
+ # build the tree from the serialized data
132
+ # directory = array of [hash (subdirname => directory) or string (filename)]
133
+ tree = dat.map { |dt| dt.map { |d| d.dup } }
134
+ tree.each { |br|
135
+ br.grep(Hash).each { |b|
136
+ b.each { |k, v| b[k] = tree[v.to_i(16) & 0xfff] }
137
+ }
138
+ }
139
+ tree = tree.first
140
+
141
+ # flatten the tree to a list of fullpath
142
+ iter = lambda { |ar, cur|
143
+ ret = []
144
+ ar.each { |elem|
145
+ case elem
146
+ when Hash; ret.concat iter[elem.values.first, cur + elem.keys.first + '/']
147
+ else ret << (cur + elem)
148
+ end
149
+ }
150
+ ret
151
+ }
152
+
153
+ @files = tree #iter[tree, '/']
154
+
155
+ encoded.ptr = @fatoff
156
+ @fat = encoded.read(@fatsz)
157
+ end
158
+
159
+ def cpu_from_headers
160
+ ARM.new
161
+ end
162
+
163
+ def each_section
164
+ yield @arm9, @header.arm9addr
165
+ yield @arm7, @header.arm7addr
166
+ end
167
+
168
+ def get_default_entrypoints
169
+ [@header.arm9entry, @header.arm7entry]
170
+ end
171
+ end
172
+ end