wilson 1.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
data/History.txt ADDED
@@ -0,0 +1,6 @@
1
+ === 1.0.0 / 2009-01-20
2
+
3
+ * 1 major enhancement
4
+
5
+ * Birfday!
6
+
data/Manifest.txt ADDED
@@ -0,0 +1,6 @@
1
+ History.txt
2
+ Manifest.txt
3
+ README.txt
4
+ Rakefile
5
+ lib/wilson.rb
6
+ test/test_wilson.rb
data/README.txt ADDED
@@ -0,0 +1,60 @@
1
+ = wilson
2
+
3
+ * http://rubyforge.org/projects/seattlerb
4
+
5
+ == DESCRIPTION:
6
+
7
+ Wilson is a pure ruby x86 assembler. No, really. Worst Idea Evar.
8
+
9
+ Why "wilson"? I wanted to name it "metal", but there is an existing
10
+ project with that name... So I'm naming it after Wilson Bilkovich, who
11
+ is about as metal as you can get (and it is easier to spell than
12
+ "bilkovich", even tho that sounds more metal).
13
+
14
+ == FEATURES/PROBLEMS:
15
+
16
+ * Generates x86 machine code directly, with no dependencies.
17
+ * Still has problems with jumps, I need smart ppl to help me.
18
+
19
+ == SYNOPSIS:
20
+
21
+ class X
22
+ defasm :superfast_meaning_of_life do
23
+ eax.mov 42.r # (42 << 1) + 1
24
+ end
25
+ end
26
+
27
+ p X.new.superfast_meaning_of_life # => 42
28
+
29
+ == REQUIREMENTS:
30
+
31
+ * rubygems
32
+
33
+ == INSTALL:
34
+
35
+ * sudo gem install wilson
36
+
37
+ == LICENSE:
38
+
39
+ (The MIT License)
40
+
41
+ Copyright (c) 2008 Ryan Davis, Seattle.rb
42
+
43
+ Permission is hereby granted, free of charge, to any person obtaining
44
+ a copy of this software and associated documentation files (the
45
+ 'Software'), to deal in the Software without restriction, including
46
+ without limitation the rights to use, copy, modify, merge, publish,
47
+ distribute, sublicense, and/or sell copies of the Software, and to
48
+ permit persons to whom the Software is furnished to do so, subject to
49
+ the following conditions:
50
+
51
+ The above copyright notice and this permission notice shall be
52
+ included in all copies or substantial portions of the Software.
53
+
54
+ THE SOFTWARE IS PROVIDED 'AS IS', WITHOUT WARRANTY OF ANY KIND,
55
+ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
56
+ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
57
+ IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
58
+ CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
59
+ TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
60
+ SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
data/Rakefile ADDED
@@ -0,0 +1,71 @@
1
+ # -*- ruby -*-
2
+
3
+ require 'rubygems'
4
+ require 'hoe'
5
+ require './lib/wilson.rb'
6
+
7
+ h=Hoe.new('wilson', Wilson::VERSION) do |p|
8
+ p.rubyforge_name = 'seattlerb'
9
+ p.developer('Ryan Davis', 'ryand-ruby@zenspider.com')
10
+ end
11
+
12
+ class Hoe
13
+ def test_cmd flavor = nil
14
+ msg = flavor ? :sh : :ruby
15
+ tests = ["rubygems", self.testlib] +
16
+ test_globs.map { |g| Dir.glob(g) }.flatten
17
+ tests.map! {|f| %Q(require "#{f}")}
18
+ cmd = "#{RUBY_FLAGS} -e '#{tests.join("; ")}' #{FILTER}"
19
+
20
+ ENV['EXCLUDED_VERSIONS'] = multiruby_skip.join(":")
21
+
22
+ cmd = "#{flavor} #{cmd}" if flavor
23
+
24
+ return msg, cmd
25
+ end
26
+ end
27
+
28
+ # def run_tests(multi=false) # :nodoc:
29
+ # msg = multi ? :sh : :ruby
30
+ # cmd = if test ?f, 'test/test_all.rb' then
31
+ # "#{RUBY_FLAGS} test/test_all.rb #{FILTER}"
32
+ # else
33
+ # tests = ["rubygems", self.testlib] +
34
+ # test_globs.map { |g| Dir.glob(g) }.flatten
35
+ # tests.map! {|f| %Q(require "#{f}")}
36
+ # "#{RUBY_FLAGS} -e '#{tests.join("; ")}' #{FILTER}"
37
+ # end
38
+ #
39
+ # excludes = multiruby_skip.join(":")
40
+ # ENV['EXCLUDED_VERSIONS'] = excludes
41
+ # cmd = "multiruby #{cmd}" if multi
42
+ #
43
+ # send msg, cmd
44
+ # end
45
+
46
+ namespace :test do
47
+ desc "profiles your tests"
48
+ task :prof do
49
+ send(*h.test_cmd(:zenprofile))
50
+ end
51
+
52
+ desc "rcov your tests"
53
+ task :rcov do
54
+ raise "not yet"
55
+ end
56
+ end
57
+
58
+ begin
59
+ require 'rcov/rcovtask'
60
+ Rcov::RcovTask.new do |t|
61
+ pattern = ENV['PATTERN'] || 'test/test_*.rb'
62
+
63
+ t.test_files = FileList[pattern]
64
+ t.verbose = true
65
+ t.rcov_opts << "--threshold 80"
66
+ t.rcov_opts << "--no-color"
67
+ end
68
+ rescue LoadError
69
+ # skip
70
+ end
71
+ # vim: syntax=Ruby
data/lib/wilson.rb ADDED
@@ -0,0 +1,4684 @@
1
+ # -*- coding: utf-8 -*-
2
+
3
+ require 'dl'
4
+ require 'dl/import'
5
+
6
+ module Ruby
7
+ extend DL::Importable
8
+
9
+ typealias "VALUE", "unsigned long", proc { |v| v.object_id << 1 }
10
+
11
+ dlload "libruby.dylib"
12
+
13
+ extern "void rb_define_method(VALUE, char*, void*, int)"
14
+ end
15
+
16
+ class Object
17
+ def r
18
+ self.object_id
19
+ end
20
+ end
21
+
22
+ class Integer
23
+ def r
24
+ (self.object_id << 1) + 1
25
+ end
26
+
27
+ def inspect
28
+ "0x#{to_s 16}"
29
+ end
30
+ end
31
+
32
+ class Module
33
+ @@asm = []
34
+
35
+ def defasm name, *args, &block
36
+ asm = Wilson::MachineCodeX86.new
37
+
38
+ asm.ebp.push
39
+ # asm.esi.push # TODO?
40
+ # asm.edi.push
41
+ asm.ebp.mov asm.esp
42
+
43
+ size = asm.stream.size
44
+
45
+ asm.instance_eval(&block)
46
+
47
+ if asm.stream.size == size # return nil
48
+ warn "returning nil for #{self}##{name}"
49
+ asm.eax.mov 4
50
+ end
51
+
52
+ # asm.edi.pop
53
+ # asm.esi.pop
54
+ # asm.ebp.pop
55
+
56
+ asm.leave
57
+ asm.ret
58
+
59
+ code = asm.stream.pack("C*")
60
+ @@asm << code
61
+
62
+ Ruby.rb_define_method self, name.to_s, code, 0
63
+ end
64
+ end
65
+
66
+ class Object
67
+ def subclass_responsibility; raise "subclass responsibility" end
68
+ def no!; false end
69
+
70
+ alias :address? :no!
71
+ alias :future_label? :no!
72
+ alias :immediate? :no!
73
+ alias :immediate_value? :no!
74
+ alias :label? :no!
75
+ alias :offset? :no!
76
+ alias :operand? :no!
77
+ alias :register? :no!
78
+ alias :special_register? :no!
79
+ end
80
+
81
+ class Integer
82
+ def m
83
+ address = Wilson::Address.new
84
+ address.offset = self
85
+ address
86
+ end
87
+
88
+ def immediate_value?
89
+ true
90
+ end
91
+ end
92
+
93
+ class Array
94
+ def second
95
+ self[1]
96
+ end
97
+
98
+ def push_D integer
99
+ self.push(*[integer].pack("V").unpack("C4"))
100
+ end
101
+
102
+ def push_B integer
103
+ self << (integer & 255)
104
+ end
105
+
106
+ def push_W integer
107
+ self.push((integer & 255), (integer >> 8 & 255))
108
+ end
109
+ end
110
+
111
+ module Wilson
112
+ VERSION = '1.0.0'
113
+
114
+ ##
115
+ # Assembler parses the NASM documentation and creates Command
116
+ # objects for it
117
+
118
+ class Assembler
119
+ attr_accessor :commands
120
+
121
+ def self.nasm_fixes
122
+ # TODO: extend parser to split /[,:]/ and remove some of these
123
+ '
124
+ CALL imm,imm16 ; o16 9A iw iw [8086]
125
+ CALL imm,imm32 ; o32 9A id iw [386]
126
+ CALLFAR mem16 ; o16 FF /3 [8086]
127
+ CALLFAR mem32 ; o32 FF /3 [386]
128
+
129
+ Jcc imm ; 0F 80+cc rw/rd [386]
130
+
131
+ JMP imm,imm16 ; o16 EA iw iw [8086]
132
+ JMP imm,imm32 ; o32 EA id iw [386]
133
+ JMP imm16 ; E9 rw/rd [8086]
134
+ JMP imm32 ; E9 rw/rd [8086]
135
+ JMP imm8 ; EB rb [8086]
136
+ JMPFAR mem16 ; o16 FF /5 [8086]
137
+ JMPFAR mem32 ; o32 FF /5 [386]
138
+
139
+ FADDTO fpureg ; DC C0+r [8086,FPU]
140
+ FDIVTO fpureg ; DC F8+r [8086,FPU]
141
+ FDIVRTO fpureg ; DC F0+r [8086,FPU]
142
+ FMULTO fpureg ; DC C8+r [8086,FPU]
143
+ FSUBTO fpureg ; DC E8+r [8086,FPU]
144
+ FSUBRTO fpureg ; DC E0+r [8086,FPU]
145
+ '
146
+ end
147
+
148
+ def self.nasm
149
+ File.read(__FILE__).split(/__END__/).last
150
+ end
151
+
152
+ @@default = nil
153
+
154
+ def self.default
155
+ @@default ||= self.new.parse
156
+ end
157
+
158
+ def self.default= o
159
+ @@default = o
160
+ end
161
+
162
+ def self.commands
163
+ self.default.commands
164
+ end
165
+
166
+ def self.parse
167
+ self.new.parse
168
+ end
169
+
170
+ def initialize
171
+ self.commands = []
172
+ end
173
+
174
+ def expand_parameters command
175
+ command.parameters.each_with_index do |parameter, index|
176
+ if String === parameter && parameter =~ /^r\/m(\d+)/ then
177
+ bits = $1.to_i
178
+ newCommand = command.dup
179
+ commands << newCommand
180
+ case bits
181
+ when 8, 16, 32 then
182
+ command.parameters[index] = MemoryRegister.new bits
183
+ newCommand.parameters[index] = Address.new false, bits
184
+ when 64 then
185
+ command.parameters[index] = MMXRegister.new bits
186
+ newCommand.parameters[index] = Address.new false, bits
187
+ end
188
+ end
189
+ end
190
+ end
191
+
192
+ def add_conditional_commands prototype
193
+ prototype.opcode = prototype.opcode[0..-3]
194
+
195
+ self.conditionals.each do |conditional, value|
196
+ command = prototype.dup
197
+ command.opcode += conditional
198
+
199
+ command.opcodes.each_with_index do |op, index|
200
+ command.opcodes[index] = ($1.hex+value).to_s(16) if op =~ /(.*)\+cc$/
201
+ end
202
+
203
+ self.add_command command
204
+ end
205
+ end
206
+
207
+ def process_line line # TODO: remove
208
+ return if line.empty?
209
+ return unless line =~ /^[A-Z].+;.*\[/
210
+
211
+ self.parse_command line
212
+ end
213
+
214
+ def add_command command
215
+ return self.add_conditional_commands(command) if command.opcode =~ /cc$/i
216
+ self.commands << command
217
+ self.expand_parameters command
218
+ end
219
+
220
+ def conditionals
221
+ @conditionals ||= {
222
+ 'O' => 0, 'NO' => 1, 'B' => 2, 'C' => 2, 'NAE' => 2,
223
+ 'AE' => 3, 'NB' => 3, 'NC' => 3, 'E' => 4, 'Z' => 4,
224
+ 'NE' => 5, 'NZ' => 5, 'BE' => 6, 'NA' => 6, 'A' => 7,
225
+ 'NBE' => 7, 'S' => 8, 'NS' => 9, 'P' => 10, 'PE' => 10,
226
+ 'NP' => 11, 'PO' => 11, 'L' => 12, 'NGE' => 12, 'GE' => 13,
227
+ 'NL' => 13, 'LE' => 14, 'NG' => 14, 'G' => 15, 'NLE' => 15,
228
+ }
229
+ end
230
+
231
+ def parse_command line
232
+ if line =~ /^(\w+)\s+([^;]*)\s+;\s+([^\[]+)\s+\[([\w,]+)\]/ then
233
+ name, params, ops, procs = $1, $2, $3, $4
234
+
235
+ command = Command.new
236
+ command.opcode = name
237
+ command.opcodes = ops.split
238
+ command.processors = procs.split(/,/)
239
+
240
+ command.initialize_parameters params.strip
241
+
242
+ self.add_command command
243
+ else
244
+ raise "unparsed: #{line}"
245
+ end
246
+ end
247
+
248
+ def parse
249
+ (self.class.nasm + self.class.nasm_fixes).each_line do |line|
250
+ self.process_line line.strip.sub(/^# /, '')
251
+ end
252
+ self
253
+ end
254
+ end
255
+
256
+ ##
257
+ # Command is a potential command you can call. It has an
258
+ # opcode (eg: MOV) and the memory format that it outputs as
259
+ # (opcodes) as well as the kinds of parameters it takes and the
260
+ # processor types that support the command.
261
+
262
+ class Command
263
+ attr_accessor :opcode, :parameters, :opcodes, :processors
264
+
265
+ def dup
266
+ x = super
267
+ x.parameters = x.parameters.dup
268
+ x.opcodes = x.opcodes.dup
269
+ x
270
+ end
271
+
272
+ # TODO: learn this better, and figure out why not polymorphic ==
273
+ def parameter_matches a, b
274
+ return false if String === b
275
+
276
+ if a.register? && b.register? then
277
+ return a.bits == b.bits && (b.id.nil? || a.id == b.id)
278
+ end
279
+
280
+ if a.address? && b.address? then
281
+ return ! b.offset? || a.offset?
282
+ end
283
+
284
+ if a.special_register? && b.special_register? then
285
+ return a.class == b.class && (b.id.nil? || a.id == b.id)
286
+ end
287
+
288
+ return false unless b.immediate?
289
+
290
+ if a.immediate_value? then
291
+ return (b.value && b.value == a) || b.bits.nil? || a < (2 ** b.bits)
292
+ end
293
+
294
+ if a.label? then
295
+ return a.future_label? ? b.bits == a.machine.bits :
296
+ a.bits <= (b.bits || a.machine.bits)
297
+ end
298
+
299
+ false
300
+ end
301
+
302
+ def instruction_applies? instruction
303
+ return false if instruction.opcode != self.opcode
304
+ return false if instruction.parameters.size != self.parameters.size
305
+
306
+ instruction.parameters.zip(self.parameters).all? { |a, b|
307
+ self.parameter_matches a, b
308
+ }
309
+ end
310
+
311
+ def to_parameter parameter
312
+ case parameter
313
+ when 'r/m8' then return parameter # "Expanded by the parser"
314
+ when 'r/m16' then return parameter # "Expanded by the parser"
315
+ when 'r/m32' then return parameter # "Expanded by the parser"
316
+ when 'r/m64' then return parameter # "Expanded by the parser"
317
+ when 'TO fpureg' then return parameter # "Fixed in nasm_fixes"
318
+ when 'SHORT imm' then return parameter # "Fixed in nasm_fixes"
319
+ when 'FAR mem' then return parameter # "Fixed in nasm_fixes"
320
+ when 'FAR mem16' then return parameter # "Fixed in nasm_fixes"
321
+ when 'FAR mem32' then return parameter # "Fixed in nasm_fixes"
322
+ when 'NEAR imm' then return parameter # "Fixed in nasm_fixes"
323
+ when 'imm:imm16' then return parameter # "Fixed in nasm_fixes"
324
+ when 'imm:imm32' then return parameter # "Fixed in nasm_fixes"
325
+ when '1' then return Immediate.new(1)
326
+ when 'AL' then return Register.on_id_bits(nil, 0, 8)
327
+ when 'AX' then return Register.on_id_bits(nil, 0, 16)
328
+ when 'EAX' then return Register.on_id_bits(nil, 0, 32)
329
+ when 'CL' then return Register.on_id_bits(nil, 1, 8)
330
+ when 'CX' then return Register.on_id_bits(nil, 1, 16)
331
+ when 'ECX' then return Register.on_id_bits(nil, 1, 32)
332
+ when 'DL' then return Register.on_id_bits(nil, 2, 8)
333
+ when 'DX' then return Register.on_id_bits(nil, 2, 16)
334
+ when 'EDX' then return Register.on_id_bits(nil, 2, 32)
335
+ when 'BL' then return Register.on_id_bits(nil, 3, 8)
336
+ when 'BX' then return Register.on_id_bits(nil, 3, 16)
337
+ when 'EBX' then return Register.on_id_bits(nil, 3, 32)
338
+ when 'ES' then return SegmentRegister.on_id(nil, 0)
339
+ when 'CS' then return SegmentRegister.on_id(nil, 1)
340
+ when 'SS' then return SegmentRegister.on_id(nil, 2)
341
+ when 'DS' then return SegmentRegister.on_id(nil, 3)
342
+ when 'FS' then return SegmentRegister.on_id(nil, 4)
343
+ when 'GS' then return SegmentRegister.on_id(nil, 5)
344
+ when 'imm' then return Immediate.new
345
+ when 'imm8' then return Immediate.new(8)
346
+ when 'imm16' then return Immediate.new(16)
347
+ when 'imm32' then return Immediate.new(32)
348
+ when 'segreg' then return SegmentRegister.new
349
+ when 'reg' then return Register.new
350
+ when 'reg8' then return Register.new(8)
351
+ when 'reg16' then return Register.new(16)
352
+ when 'reg32' then return Register.new(32)
353
+ when 'mem' then return Address.new(false, 4)
354
+ when 'mem8' then return Address.new(false, 8)
355
+ when 'mem16' then return Address.new(false, 16)
356
+ when 'mem32' then return Address.new(false, 32)
357
+ when 'mem64' then return Address.new(false, 64)
358
+ when 'mem80' then return Address.new(false, 80)
359
+ when 'memoffs8' then return Address.new(true, 8)
360
+ when 'memoffs16' then return Address.new(true, 16)
361
+ when 'memoffs32' then return Address.new(true, 32)
362
+ when 'fpureg' then return FPURegister.new
363
+ when /ST(.*)/ then return FPURegister.new($1.to_i)
364
+ when 'mmxreg' then return MMXRegister.new
365
+ when /MM(.*)/ then return MMXRegister.new($1.to_i)
366
+ when 'CR0/2/3/4' then return ControlRegister.new
367
+ when 'DR0/1/2/3/6/7' then return DebugRegister.new
368
+ when 'TR3/4/5/6/7' then return TestRegister.new
369
+ else
370
+ warn "unknown parameter: #{parameter.inspect}"
371
+ return parameter
372
+ end
373
+ end
374
+
375
+ def initialize_parameters params
376
+ self.parameters = params.split(/,/).map { |s| self.to_parameter s }
377
+ end
378
+
379
+ def assemble instruction
380
+ stream = []
381
+
382
+ opcodes.each_with_index do |each, index|
383
+ self.execute_instruction_position_on(each, instruction,
384
+ (index + 1) / opcodes.size, stream)
385
+ end
386
+
387
+ stream
388
+ end
389
+
390
+ def execute_instruction_position_on(byte, instruction, position, stream)
391
+ case byte
392
+ when 'a16', 'a32' then
393
+ raise "not done yet"
394
+ when 'o16' then
395
+ return self.align16_on(instruction, stream)
396
+ when 'o32' then
397
+ return self.align32_on(instruction, stream)
398
+ when 'ib' then
399
+ return stream.push_B(instruction.theImmediate)
400
+ when 'iw' then
401
+ return stream.push_W(instruction.theSecondImmediate) if position == 1
402
+ return stream.push_W(instruction.theImmediate)
403
+ when 'id' then
404
+ return stream.push_D(instruction.theSecondImmediate) if position == 1
405
+ return stream.push_D(instruction.theImmediate)
406
+ when 'rb' then
407
+ return self.relative_b_on(instruction, stream)
408
+ when 'rw' then
409
+ return self.relative_w_on(instruction, stream)
410
+ when 'rw/rd' then
411
+ return self.relative_w_on(instruction, stream) if
412
+ instruction.machine.bits == 16
413
+ return self.relative_d_on(instruction, stream)
414
+ when 'rd' then
415
+ return self.relative_d_on(instruction, stream)
416
+ when 'ow' then
417
+ raise byte
418
+ # [^stream push_W: instruction theAddress offset].
419
+ when 'od' then
420
+ raise byte
421
+ # [^stream push_D: instruction theAddress offset].
422
+ when 'ow/od' then
423
+ if instruction.machine.bits == 16 then
424
+ stream.push_W instruction.theAddress.offset
425
+ end
426
+
427
+ return stream.push_D(instruction.theAddress.offset)
428
+ when /^\/(.*)/ then
429
+ return self.modrm_instruction_on($1, instruction, stream)
430
+ end
431
+
432
+ number = byte.hex
433
+ number += instruction.parameters[parameters.first.id ? 1 : 0].id if
434
+ byte =~ /r$/
435
+ stream << number
436
+ end
437
+
438
+ ##
439
+ # If we get here, there will be at least two parameters to combine
440
+ # a memory address with a register or a register with a register"
441
+
442
+ def modrm_r_on instruction, stream
443
+ address, register = instruction.first, instruction.second
444
+ swap = false # TODO: this can be 1 call at the bottom
445
+
446
+ if instruction.first.register? && instruction.second.register? then
447
+ if parameters.first.memory_register? then
448
+ return instruction.first.push_mod_rm_on(instruction.second, stream)
449
+ else
450
+ return instruction.second.push_mod_rm_on(instruction.first, stream)
451
+ end
452
+ end
453
+
454
+ if instruction.first.special_register? then
455
+ return instruction.second.push_mod_rm_on(instruction.first, stream)
456
+ end
457
+
458
+ if instruction.second.special_register? then
459
+ return instruction.first.push_mod_rm_on(instruction.second, stream)
460
+ end
461
+
462
+ address, register = if instruction.first.register? && instruction.second.respond_to?(:push_mod_rm_on) then
463
+ [instruction.second, instruction.first]
464
+ else
465
+ [instruction.first, instruction.second]
466
+ end
467
+
468
+ address.push_mod_rm_on register, stream
469
+ end
470
+
471
+ def align16_on instruction, stream
472
+ stream << 0x66 if instruction.machine.bits != 16
473
+ end
474
+
475
+ def relative_x_on instruction, stream, msg, dist
476
+ offset = instruction.first
477
+ offset = offset.offset if offset.offset?
478
+
479
+ if offset.label? then
480
+ if offset.future_label? then
481
+ offset.add instruction.machine.stream.size
482
+ return stream.send(msg, dist)
483
+ end
484
+ offset = offset.position
485
+ end
486
+
487
+ stream.send(msg, -(instruction.machine.stream.size - offset + dist))
488
+ end
489
+
490
+ def relative_b_on instruction, stream
491
+ relative_x_on instruction, stream, :push_B, 2
492
+ end
493
+
494
+ def relative_d_on instruction, stream
495
+ relative_x_on instruction, stream, :push_D, 5
496
+ end
497
+
498
+ def relative_w_on instruction, stream
499
+ relative_x_on instruction, stream, :push_W, 3
500
+ end
501
+
502
+ def modrm_n_instruction_on id, instruction, stream
503
+ instruction.first.push_mod_rm_on Register.on_id_bits(instruction.machine, id, instruction.first.bits), stream
504
+ end
505
+
506
+ def align32_on instruction, stream
507
+ stream << 0x67 if instruction.machine.bits != 32
508
+ end
509
+
510
+ def modrm_instruction_on byte, instruction, stream
511
+ if byte == "r" then
512
+ self.modrm_r_on instruction, stream
513
+ else
514
+ self.modrm_n_instruction_on byte.to_i, instruction, stream
515
+ end
516
+ end
517
+ end
518
+
519
+ ##
520
+ # Instruction is an instruction shape that we're going to match to
521
+ # Commands to find out what we should write in to memory.
522
+
523
+ class Instruction
524
+ attr_accessor :opcode, :machine, :parameters
525
+
526
+ def self.on_message machine, message # TODO: remove
527
+ self.new message, machine
528
+ end
529
+
530
+ def initialize message, machine
531
+ self.machine = machine
532
+ self.opcode, *self.parameters = message
533
+ self.opcode = opcode.to_s.upcase
534
+
535
+ self.machine = parameters[1].machine unless machine
536
+
537
+ self.parameters.map! { |each| Proc === each ? each.call.m : each }
538
+
539
+ self.parameters.each do |each|
540
+ each.machine = self.machine if each.operand?
541
+ end
542
+ end
543
+
544
+ def first
545
+ parameters.first
546
+ end
547
+
548
+ def second
549
+ parameters.second
550
+ end
551
+
552
+ def theAddress
553
+ parameters.detect { |e| e.address? }
554
+ end
555
+
556
+ def assemble
557
+ instructions = machine.instructions.select { |command|
558
+ command.instruction_applies? self
559
+ }
560
+
561
+ return false if instructions.empty?
562
+
563
+ bytes = instructions.map { |instruction| instruction.assemble self }
564
+
565
+ sorted_bytes = bytes.sort_by {|byte| [byte.size, (byte[0]||0), (byte[1]||0)]}
566
+
567
+ machine.stream.push(*sorted_bytes.first)
568
+
569
+ true
570
+ end
571
+
572
+ def theSecondImmediate
573
+ parameters.detect { |e| e.immediate_value? }
574
+ end
575
+
576
+ def theImmediate
577
+ parameters.reverse.detect { |e| e.immediate_value? }
578
+ end
579
+ end
580
+
581
+ ##
582
+ # MachineCode is an abstract machine that has subclasses for each
583
+ # concrete machine type that you can write assembly language for.
584
+ # Right now this library only supports X86, so loko at
585
+ # MachineCodeX86 for more details on how to use it.
586
+
587
+ class MachineCode
588
+ attr_accessor :stream, :procedure, :bits, :cachedInstructions
589
+ attr_reader :processors
590
+ attr_writer :instructions
591
+
592
+ def initialize
593
+ self.procedure = nil
594
+ self.bits = self.defaultBits
595
+ self.processors = self.defaultProcessors
596
+ self.stream = []
597
+
598
+ self.setupMachine
599
+ end
600
+
601
+ def inspect
602
+ "#{self.class}#{stream.inspect}"
603
+ end
604
+
605
+ def processors= o
606
+ @processors = o
607
+ @cachedInstructions = nil
608
+ end
609
+
610
+ def supportsProcessor instructionProcessors
611
+ processors.any? { |e| instructionProcessors.include? e }
612
+ end
613
+
614
+ def instructions
615
+ self.cachedInstructions ||= @instructions.select { |e|
616
+ self.supportsProcessor e.processors
617
+ }
618
+ end
619
+
620
+ def method_missing msg, *args
621
+ super unless self.instructionFromMessage(msg, *args).assemble
622
+ end
623
+
624
+ def instructionFromMessage msg, *args
625
+ Instruction.on_message self, [msg, *args]
626
+ end
627
+
628
+ def label
629
+ Label.on_at(self, stream.size)
630
+ end
631
+
632
+ def future_label
633
+ FutureLabel.on self
634
+ end
635
+
636
+ def assemble instruction
637
+ raise "no"
638
+ # aBlock on: MessageNotUnderstood do: [:ex |
639
+ # ex originator class = BlockClosure ifFalse: [ex pass].
640
+ # ex resume: (ex originator value m perform: ex parameter selector withArguments: ex parameter arguments)]</body>
641
+ end
642
+
643
+ alias :setupMachine :subclass_responsibility
644
+ alias :platform :subclass_responsibility
645
+ alias :defaultBits :subclass_responsibility
646
+ end
647
+
648
+ ##
649
+ # MachineCodeX86 is a concrete implementation of a machine to create
650
+ # X86 assembly code on.
651
+ #
652
+ # You can use this class in two ways:
653
+ #
654
+ # a) you can instantiate an instance and use its register variables
655
+ # to build up machine code in the @stream variable and then use
656
+ # those bytes in any way that you see fit, or
657
+ #
658
+ # b) you can make a subclass of this class much like you do with
659
+ # ExternalInterface and put methods on the class that will
660
+ # compile in to assembler code that can be called from Smalltalk
661
+ # code
662
+ #
663
+ # == Using MachineCodeX86 for scripting
664
+ #
665
+ # This is the long hand way of writing assembly code, since you
666
+ # always include a receiver with every command.
667
+ #
668
+ # asm = Assembler.MachineCodeX86.new
669
+ #
670
+ # Once you have an assembler, you can access the registers and send
671
+ # commands to them, eg:
672
+ #
673
+ # asm.eax.mov 1
674
+ #
675
+ # As you send the commands, the @stream will build up containing the
676
+ # X86 assembler bytes you can use. You can use memory addresses in
677
+ # your assembler code with the #m method, eg:
678
+ #
679
+ # asm.eax.m.mov 1
680
+ #
681
+ # Once you are finished, you simply send:
682
+ #
683
+ # asm.stream
684
+ #
685
+ # This will return you the stream of bytes.
686
+ #
687
+ # == Labels & Jumps
688
+ #
689
+ # You can do labels and jump to them using two different label
690
+ # commands. The first is #label, which places a label jump point
691
+ # immediately on call, eg:
692
+ #
693
+ # label = asm.label
694
+ # label.jmp
695
+ #
696
+ # The other is a future label that can be placed at some future
697
+ # point in the program and jumped too
698
+ #
699
+ # label = asm.future_label
700
+ # asm.eax.xor asm.eax
701
+ # label.jmp
702
+ # asm.eax.inc
703
+ # label.plant
704
+ #
705
+ # You #plant the future label where you want it to actually be and
706
+ # past references to it will be updated. Future labels will always
707
+ # use a dword jmp so that there's space to fill in the command if
708
+ # the jmp ends up being far.
709
+
710
+ class MachineCodeX86 < MachineCode
711
+ # registers-general-32bit
712
+ attr_accessor :eax, :ebx, :ebp, :esp, :edi, :esi, :ecx, :edx
713
+
714
+ # registers-fpu
715
+ attr_accessor :st0, :st1, :st2, :st3, :st4, :st5, :st6, :st7
716
+
717
+ # registers-debug
718
+ attr_accessor :dr0, :dr1, :dr2, :dr3, :dr6, :dr7
719
+
720
+ # registers-segment
721
+ attr_accessor :es, :ss, :cs, :gs, :fs, :ds
722
+
723
+ # registers-test
724
+ attr_accessor :tr3, :tr4, :tr5, :tr6, :tr7
725
+
726
+ # registers-general-8bit
727
+ attr_accessor :al, :ah, :bl, :bh, :cl, :ch, :dl, :dh
728
+
729
+ # registers-general-16bit
730
+ attr_accessor :ax, :bx, :cx, :dx, :sp, :bp, :si, :di
731
+
732
+ # registers-control
733
+ attr_accessor :cr0, :cr2, :cr3, :cr4
734
+
735
+ # registers-mmx
736
+ attr_accessor :mm0, :mm1, :mm2, :mm3, :mm4, :mm5, :mm6, :mm7
737
+
738
+ def setupFPURegisters
739
+ self.st0 = FPURegister.on_id self, 0
740
+ self.st1 = FPURegister.on_id self, 1
741
+ self.st2 = FPURegister.on_id self, 2
742
+ self.st3 = FPURegister.on_id self, 3
743
+ self.st4 = FPURegister.on_id self, 4
744
+ self.st5 = FPURegister.on_id self, 5
745
+ self.st6 = FPURegister.on_id self, 6
746
+ self.st7 = FPURegister.on_id self, 7
747
+ end
748
+
749
+ def setupControlRegisters
750
+ self.cr0 = ControlRegister.on_id self, 0
751
+ self.cr2 = ControlRegister.on_id self, 2
752
+ self.cr3 = ControlRegister.on_id self, 3
753
+ self.cr4 = ControlRegister.on_id self, 4
754
+ end
755
+
756
+ def platform
757
+ 'i386'
758
+ end
759
+
760
+ def setupDebugRegisters
761
+ self.dr0 = DebugRegister.on_id self, 0
762
+ self.dr1 = DebugRegister.on_id self, 1
763
+ self.dr2 = DebugRegister.on_id self, 2
764
+ self.dr3 = DebugRegister.on_id self, 3
765
+ self.dr6 = DebugRegister.on_id self, 6
766
+ self.dr7 = DebugRegister.on_id self, 7
767
+ end
768
+
769
+ def defaultBits
770
+ 32
771
+ end
772
+
773
+ def setupSegmentRegisters
774
+ self.es = SegmentRegister.on_id self, 0
775
+ self.cs = SegmentRegister.on_id self, 1
776
+ self.ss = SegmentRegister.on_id self, 2
777
+ self.ds = SegmentRegister.on_id self, 3
778
+ self.fs = SegmentRegister.on_id self, 4
779
+ self.gs = SegmentRegister.on_id self, 5
780
+ end
781
+
782
+ def defaultProcessors
783
+ %w(8086 186 286 386 486 PENT P6 CYRIX FPU MMX PRIV UNDOC)
784
+ end
785
+
786
+ def setupMachine
787
+ self.instructions = Assembler.commands
788
+
789
+ self.setup8BitRegisters
790
+ self.setup16BitRegisters
791
+ self.setup32BitRegisters
792
+ self.setupSegmentRegisters
793
+ self.setupControlRegisters
794
+ self.setupTestRegisters
795
+ self.setupDebugRegisters
796
+ self.setupFPURegisters
797
+ self.setupMMXRegisters
798
+ end
799
+
800
+ def setup8BitRegisters
801
+ self.al = Register.on_id_bits self, 0, 8
802
+ self.cl = Register.on_id_bits self, 1, 8
803
+ self.dl = Register.on_id_bits self, 2, 8
804
+ self.bl = Register.on_id_bits self, 3, 8
805
+ self.ah = Register.on_id_bits self, 4, 8
806
+ self.ch = Register.on_id_bits self, 5, 8
807
+ self.dh = Register.on_id_bits self, 6, 8
808
+ self.bh = Register.on_id_bits self, 7, 8
809
+ end
810
+
811
+ def setup16BitRegisters
812
+ self.ax = Register.on_id_bits self, 0, 16
813
+ self.cx = Register.on_id_bits self, 1, 16
814
+ self.dx = Register.on_id_bits self, 2, 16
815
+ self.bx = Register.on_id_bits self, 3, 16
816
+ self.sp = Register.on_id_bits self, 4, 16
817
+ self.bp = Register.on_id_bits self, 5, 16
818
+ self.si = Register.on_id_bits self, 6, 16
819
+ self.di = Register.on_id_bits self, 7, 16
820
+ end
821
+
822
+ def setupMMXRegisters
823
+ self.mm0 = MMXRegister.on_id self, 0
824
+ self.mm1 = MMXRegister.on_id self, 1
825
+ self.mm2 = MMXRegister.on_id self, 2
826
+ self.mm3 = MMXRegister.on_id self, 3
827
+ self.mm4 = MMXRegister.on_id self, 4
828
+ self.mm5 = MMXRegister.on_id self, 5
829
+ self.mm6 = MMXRegister.on_id self, 6
830
+ self.mm7 = MMXRegister.on_id self, 7
831
+ end
832
+
833
+ def setupTestRegisters
834
+ self.tr3 = TestRegister.on_id self, 3
835
+ self.tr4 = TestRegister.on_id self, 4
836
+ self.tr5 = TestRegister.on_id self, 5
837
+ self.tr6 = TestRegister.on_id self, 6
838
+ self.tr7 = TestRegister.on_id self, 7
839
+ end
840
+
841
+ def setup32BitRegisters
842
+ self.eax = Register.on_id_bits self, 0, 32
843
+ self.ecx = Register.on_id_bits self, 1, 32
844
+ self.edx = Register.on_id_bits self, 2, 32
845
+ self.ebx = Register.on_id_bits self, 3, 32
846
+ self.esp = Register.on_id_bits self, 4, 32
847
+ self.ebp = Register.on_id_bits self, 5, 32
848
+ self.esi = Register.on_id_bits self, 6, 32
849
+ self.edi = Register.on_id_bits self, 7, 32
850
+ end
851
+ end
852
+
853
+ ##
854
+ # Operand is any kind of operand used in a command or instruction,
855
+ # eg: registers, memory addresses, labels, immediates, etc.
856
+
857
+ class Operand
858
+ attr_accessor :machine, :bits
859
+
860
+ def self.on machine
861
+ x = self.new
862
+ x.machine = machine
863
+ x
864
+ end
865
+
866
+ # TODO: fix _all_ initialize methods from here down to have cleaner args
867
+ def initialize bits = nil, machine = nil
868
+ @bits = bits
869
+ @machine = machine
870
+ end
871
+
872
+ def method_missing msg, *args, &b
873
+ super unless self.instructionFromMessage(msg, *args, &b).assemble
874
+ end
875
+
876
+ def instructionFromMessage msg, *args, &b
877
+ Instruction.on_message machine, [msg, self, *args] + (b ? [b] : [])
878
+ end
879
+
880
+ def operand?
881
+ true
882
+ end
883
+ end
884
+
885
+ ##
886
+ # Immediate is an Integer wrapper so that we know the machine we're
887
+ # dealing with when we apply commands
888
+
889
+ class Immediate < Operand
890
+ attr_accessor :value
891
+
892
+ def immediate?
893
+ true
894
+ end
895
+ end
896
+
897
+ ##
898
+ # Address is a memory address in one of the following example forms:
899
+ #
900
+ # eax, ebx + ecx, eax + 5, 23545, edx + eax + 2312
901
+
902
+ class Address < Operand
903
+ attr_accessor :id, :index
904
+ attr_reader :offset
905
+ attr_writer :isAssemblerOffset # FIX
906
+
907
+ def self.on_id_offset machine, id, offset
908
+ address = self.new
909
+ address.machine = machine
910
+ address.id = id
911
+ address.offset = offset
912
+ address
913
+ end
914
+
915
+ def initialize isAssemblerOffset = nil, bits = nil, id = nil
916
+ super(bits)
917
+
918
+ self.isAssemblerOffset = isAssemblerOffset
919
+ self.id = id
920
+
921
+ self.index = self.offset = nil
922
+ end
923
+
924
+ def bits
925
+ super || self.machine.bits
926
+ end
927
+
928
+ def offset= obj
929
+ if obj.register? then
930
+ @offset = 0
931
+ self.index = obj
932
+ else
933
+ @offset = obj
934
+ end
935
+ end
936
+
937
+ def + o # TODO: this seems totally and completely wrong
938
+ if o.register? then
939
+ self.index = o
940
+ else
941
+ self.offset = o
942
+ end
943
+ self
944
+ end
945
+
946
+ def address?
947
+ true
948
+ end
949
+
950
+ def offset?
951
+ @isAssemblerOffset.nil? ? id.nil? : @isAssemblerOffset
952
+ end
953
+
954
+ def push_mod_rm_on spareRegister, stream
955
+ if id.nil? then
956
+ stream << (0b00000101 + (spareRegister.id << 3))
957
+ return stream.push_D(offset)
958
+ end
959
+
960
+ modrm = case offset
961
+ when 0 then
962
+ 0b00000000
963
+ when 1..255 then
964
+ 0b01000000
965
+ else
966
+ 0b10000000
967
+ end
968
+
969
+ if index.nil? then
970
+ modrm += (spareRegister.id << 3)
971
+ else
972
+ stream << (0b00000100 + (spareRegister.id << 3))
973
+ modrm += (index.id << 3)
974
+ end
975
+
976
+ stream << modrm + id
977
+
978
+ return self if offset == 0
979
+ return stream.push_B(offset) if offset < 256
980
+
981
+ stream.push_D offset
982
+ end
983
+
984
+ def m
985
+ self
986
+ end
987
+ end
988
+
989
+ ##
990
+ # Register is a general X86 register, such as eax, ebx, ecx, edx,
991
+ # etc...
992
+
993
+ class Register < Operand
994
+ attr_accessor :id
995
+
996
+ def self.on_id_bits machine, id, bits
997
+ self.new bits, machine, id
998
+ end
999
+
1000
+ def initialize bits = nil, machine = nil, id = nil
1001
+ super(bits, machine)
1002
+ self.id = id
1003
+ end
1004
+
1005
+ def memory_register?
1006
+ false
1007
+ end
1008
+
1009
+ def register?
1010
+ true
1011
+ end
1012
+
1013
+ def get address # TODO: test
1014
+ self.mov address
1015
+ self.mov {self}
1016
+ end
1017
+
1018
+ def push_mod_rm_on spareRegister, stream
1019
+ stream << (0b11000000 + id + (spareRegister.id << 3))
1020
+ end
1021
+
1022
+ def m
1023
+ self + 0
1024
+ end
1025
+
1026
+ def - offset
1027
+ self + -offset
1028
+ end
1029
+
1030
+ def + offset
1031
+ Address.on_id_offset machine, id, offset
1032
+ end
1033
+ end
1034
+
1035
+ ##
1036
+ # MemoryRegister is a regular Register, but the parser needs to know
1037
+ # if it is a primary or secondary register. This form is a private
1038
+ # secondary register. Use Register instead of this guy.
1039
+
1040
+ class MemoryRegister < Register
1041
+ def memory_register?
1042
+ true
1043
+ end
1044
+ end
1045
+
1046
+ ##
1047
+ # Label is a known point in the byte stream that we can jmp/loop back to.
1048
+
1049
+ class Label < Operand
1050
+ attr_accessor :position
1051
+
1052
+ def self.on_at machine, position
1053
+ label = self.new
1054
+ label.machine = machine
1055
+ label.position = position
1056
+ label
1057
+ end
1058
+
1059
+ def bits
1060
+ distance = machine.stream.size - position
1061
+
1062
+ if distance < 256 then
1063
+ 8
1064
+ elsif distance < 65536 then
1065
+ 16
1066
+ else
1067
+ 32
1068
+ end
1069
+ end
1070
+
1071
+ def label?
1072
+ true
1073
+ end
1074
+ end
1075
+
1076
+ ##
1077
+ # FutureLabel is a label in memory that hasn't been defined yet and
1078
+ # will go back and fill in the appropriate memory bytes later
1079
+
1080
+ class FutureLabel < Label
1081
+ attr_accessor :positions
1082
+
1083
+ def initialize
1084
+ super
1085
+ self.positions = []
1086
+ end
1087
+
1088
+ def plant
1089
+ self.position = machine.stream.size
1090
+
1091
+ positions.each do |each|
1092
+ size = machine.stream[each + 1]
1093
+ address = []
1094
+ case size
1095
+ when 2 then
1096
+ address.push_B(position - each - 2)
1097
+ when 3 then
1098
+ address.push_W(position - each - 3)
1099
+ when 5 then
1100
+ address.push_D(position - each - 5)
1101
+ else
1102
+ raise "unhandled size #{size}"
1103
+ end
1104
+
1105
+ address.each_with_index do |byte, index|
1106
+ idx = each + index + 1
1107
+ machine.stream[idx] = byte
1108
+ end
1109
+ end
1110
+ end
1111
+
1112
+ def future_label?
1113
+ position.nil?
1114
+ end
1115
+
1116
+ def add aPosition
1117
+ positions << aPosition
1118
+ end
1119
+ end
1120
+
1121
+ ##
1122
+ # SpecialRegister is the abstract implementation of any kind of
1123
+ # register that isn't a general register, eg: segment registers, mmx
1124
+ # registers, fpu registers, etc...
1125
+
1126
+ class SpecialRegister < Operand
1127
+ attr_accessor :id
1128
+
1129
+ def self.on_id machine, id
1130
+ register = self.new
1131
+ register.machine = machine
1132
+ register.id = id
1133
+ register
1134
+ end
1135
+
1136
+ def special_register?
1137
+ true
1138
+ end
1139
+ end
1140
+
1141
+ ##
1142
+ # DebugRegister is an X86 DRx register
1143
+
1144
+ class DebugRegister < SpecialRegister
1145
+ end
1146
+
1147
+ ##
1148
+ # TestRegister is an X86 Test Register, TRx
1149
+
1150
+ class TestRegister < SpecialRegister
1151
+ end
1152
+
1153
+ ##
1154
+ # FPURegister is an X86 fpureg, STx
1155
+
1156
+ class FPURegister < SpecialRegister
1157
+ def initialize id = nil
1158
+ super()
1159
+ self.id = id
1160
+ end
1161
+ end
1162
+
1163
+ ##
1164
+ # ControlRegister is an X86 CRx register
1165
+
1166
+ class ControlRegister < SpecialRegister
1167
+ end
1168
+
1169
+ ##
1170
+ # MMXRegister is an X86 MMX register
1171
+
1172
+ class MMXRegister < SpecialRegister
1173
+ def push_mod_rm_on spareRegister, stream
1174
+ stream << (0b11000000 + id + (spareRegister.id << 3))
1175
+ end
1176
+ end
1177
+
1178
+ ##
1179
+ # SegmentRegister is an X86 segment register, eg: ss, cs, ds, es...
1180
+
1181
+ class SegmentRegister < SpecialRegister
1182
+ end
1183
+ end # module Wilson
1184
+
1185
+ __END__
1186
+
1187
+ # The Netwide Assembler: NASM
1188
+
1189
+ # Previous Chapter <nasmdo10.html> | Contents <nasmdoc0.html> | Index
1190
+ # <nasmdoci.html>
1191
+
1192
+
1193
+ # Appendix A: Intel x86 Instruction Reference
1194
+
1195
+ # This appendix provides a complete list of the machine instructions which
1196
+ # NASM will assemble, and a short description of the function of each one.
1197
+
1198
+ # It is not intended to be exhaustive documentation on the fine details of
1199
+ # the instructions' function, such as which exceptions they can trigger:
1200
+ # for such documentation, you should go to Intel's Web site,
1201
+ # |http://www.intel.com/|.
1202
+
1203
+ # Instead, this appendix is intended primarily to provide documentation on
1204
+ # the way the instructions may be used within NASM. For example, looking
1205
+ # up |LOOP| will tell you that NASM allows |CX| or |ECX| to be specified
1206
+ # as an optional second argument to the |LOOP| instruction, to enforce
1207
+ # which of the two possible counter registers should be used if the
1208
+ # default is not the one desired.
1209
+
1210
+ # The instructions are not quite listed in alphabetical order, since
1211
+ # groups of instructions with similar functions are lumped together in the
1212
+ # same entry. Most of them don't move very far from their alphabetic
1213
+ # position because of this.
1214
+
1215
+
1216
+ # A.1 Key to Operand Specifications
1217
+
1218
+ # The instruction descriptions in this appendix specify their operands
1219
+ # using the following notation:
1220
+
1221
+ # * Registers: |reg8| denotes an 8-bit general purpose register,
1222
+ # |reg16| denotes a 16-bit general purpose register, and |reg32| a
1223
+ # 32-bit one. |fpureg| denotes one of the eight FPU stack registers,
1224
+ # |mmxreg| denotes one of the eight 64-bit MMX registers, and
1225
+ # |segreg| denotes a segment register. In addition, some registers
1226
+ # (such as |AL|, |DX| or |ECX|) may be specified explicitly.
1227
+ # * Immediate operands: |imm| denotes a generic immediate operand.
1228
+ # |imm8|, |imm16| and |imm32| are used when the operand is intended
1229
+ # to be a specific size. For some of these instructions, NASM needs
1230
+ # an explicit specifier: for example, |ADD ESP,16| could be
1231
+ # interpreted as either |ADD r/m32,imm32| or |ADD r/m32,imm8|. NASM
1232
+ # chooses the former by default, and so you must specify |ADD
1233
+ # ESP,BYTE 16| for the latter.
1234
+ # * Memory references: |mem| denotes a generic memory reference;
1235
+ # |mem8|, |mem16|, |mem32|, |mem64| and |mem80| are used when the
1236
+ # operand needs to be a specific size. Again, a specifier is needed
1237
+ # in some cases: |DEC [address]| is ambiguous and will be rejected
1238
+ # by NASM. You must specify |DEC BYTE [address]|, |DEC WORD
1239
+ # [address]| or |DEC DWORD [address]| instead.
1240
+ # * Restricted memory references: one form of the |MOV| instruction
1241
+ # allows a memory address to be specified /without/ allowing the
1242
+ # normal range of register combinations and effective address
1243
+ # processing. This is denoted by |memoffs8|, |memoffs16| and
1244
+ # |memoffs32|.
1245
+ # * Register or memory choices: many instructions can accept either a
1246
+ # register /or/ a memory reference as an operand. |r/m8| is a
1247
+ # shorthand for |reg8/mem8|; similarly |r/m16| and |r/m32|. |r/m64|
1248
+ # is MMX-related, and is a shorthand for |mmxreg/mem64|.
1249
+
1250
+
1251
+ # A.2 Key to Opcode Descriptions
1252
+
1253
+ # This appendix also provides the opcodes which NASM will generate for
1254
+ # each form of each instruction. The opcodes are listed in the following way:
1255
+
1256
+ # * A hex number, such as |3F|, indicates a fixed byte containing that
1257
+ # number.
1258
+ # * A hex number followed by |+r|, such as |C8+r|, indicates that one
1259
+ # of the operands to the instruction is a register, and the
1260
+ # `register value' of that register should be added to the hex
1261
+ # number to produce the generated byte. For example, EDX has
1262
+ # register value 2, so the code |C8+r|, when the register operand is
1263
+ # EDX, generates the hex byte |CA|. Register values for specific
1264
+ # registers are given in section A.2.1 <#section-A.2.1>.
1265
+ # * A hex number followed by |+cc|, such as |40+cc|, indicates that
1266
+ # the instruction name has a condition code suffix, and the numeric
1267
+ # representation of the condition code should be added to the hex
1268
+ # number to produce the generated byte. For example, the code
1269
+ # |40+cc|, when the instruction contains the |NE| condition,
1270
+ # generates the hex byte |45|. Condition codes and their numeric
1271
+ # representations are given in section A.2.2 <#section-A.2.2>.
1272
+ # * A slash followed by a digit, such as |/2|, indicates that one of
1273
+ # the operands to the instruction is a memory address or register
1274
+ # (denoted |mem| or |r/m|, with an optional size). This is to be
1275
+ # encoded as an effective address, with a ModR/M byte, an optional
1276
+ # SIB byte, and an optional displacement, and the spare (register)
1277
+ # field of the ModR/M byte should be the digit given (which will be
1278
+ # from 0 to 7, so it fits in three bits). The encoding of effective
1279
+ # addresses is given in section A.2.3 <#section-A.2.3>.
1280
+ # * The code |/r| combines the above two: it indicates that one of the
1281
+ # operands is a memory address or |r/m|, and another is a register,
1282
+ # and that an effective address should be generated with the spare
1283
+ # (register) field in the ModR/M byte being equal to the `register
1284
+ # value' of the register operand. The encoding of effective
1285
+ # addresses is given in section A.2.3 <#section-A.2.3>; register
1286
+ # values are given in section A.2.1 <#section-A.2.1>.
1287
+ # * The codes |ib|, |iw| and |id| indicate that one of the operands to
1288
+ # the instruction is an immediate value, and that this is to be
1289
+ # encoded as a byte, little-endian word or little-endian doubleword
1290
+ # respectively.
1291
+ # * The codes |rb|, |rw| and |rd| indicate that one of the operands to
1292
+ # the instruction is an immediate value, and that the /difference/
1293
+ # between this value and the address of the end of the instruction
1294
+ # is to be encoded as a byte, word or doubleword respectively. Where
1295
+ # the form |rw/rd| appears, it indicates that either |rw| or |rd|
1296
+ # should be used according to whether assembly is being performed in
1297
+ # |BITS 16| or |BITS 32| state respectively.
1298
+ # * The codes |ow| and |od| indicate that one of the operands to the
1299
+ # instruction is a reference to the contents of a memory address
1300
+ # specified as an immediate value: this encoding is used in some
1301
+ # forms of the |MOV| instruction in place of the standard
1302
+ # effective-address mechanism. The displacement is encoded as a word
1303
+ # or doubleword. Again, |ow/od| denotes that |ow| or |od| should be
1304
+ # chosen according to the |BITS| setting.
1305
+ # * The codes |o16| and |o32| indicate that the given form of the
1306
+ # instruction should be assembled with operand size 16 or 32 bits.
1307
+ # In other words, |o16| indicates a |66| prefix in |BITS 32| state,
1308
+ # but generates no code in |BITS 16| state; and |o32| indicates a
1309
+ # |66| prefix in |BITS 16| state but generates nothing in |BITS 32|.
1310
+ # * The codes |a16| and |a32|, similarly to |o16| and |o32|, indicate
1311
+ # the address size of the given form of the instruction. Where this
1312
+ # does not match the |BITS| setting, a |67| prefix is required.
1313
+
1314
+
1315
+ # A.2.1 Register Values
1316
+
1317
+ # Where an instruction requires a register value, it is already implicit
1318
+ # in the encoding of the rest of the instruction what type of register is
1319
+ # intended: an 8-bit general-purpose register, a segment register, a debug
1320
+ # register, an MMX register, or whatever. Therefore there is no problem
1321
+ # with registers of different types sharing an encoding value.
1322
+
1323
+ # The encodings for the various classes of register are:
1324
+
1325
+ # * 8-bit general registers: |AL| is 0, |CL| is 1, |DL| is 2, |BL| is
1326
+ # 3, |AH| is 4, |CH| is 5, |DH| is 6, and |BH| is 7.
1327
+ # * 16-bit general registers: |AX| is 0, |CX| is 1, |DX| is 2, |BX| is
1328
+ # 3, |SP| is 4, |BP| is 5, |SI| is 6, and |DI| is 7.
1329
+ # * 32-bit general registers: |EAX| is 0, |ECX| is 1, |EDX| is 2,
1330
+ # |EBX| is 3, |ESP| is 4, |EBP| is 5, |ESI| is 6, and |EDI| is 7.
1331
+ # * Segment registers: |ES| is 0, |CS| is 1, |SS| is 2, |DS| is 3,
1332
+ # |FS| is 4, and |GS| is 5.
1333
+ # * {Floating-point registers}: |ST0| is 0, |ST1| is 1, |ST2| is 2,
1334
+ # |ST3| is 3, |ST4| is 4, |ST5| is 5, |ST6| is 6, and |ST7| is 7.
1335
+ # * 64-bit MMX registers: |MM0| is 0, |MM1| is 1, |MM2| is 2, |MM3| is
1336
+ # 3, |MM4| is 4, |MM5| is 5, |MM6| is 6, and |MM7| is 7.
1337
+ # * Control registers: |CR0| is 0, |CR2| is 2, |CR3| is 3, and |CR4|
1338
+ # is 4.
1339
+ # * Debug registers: |DR0| is 0, |DR1| is 1, |DR2| is 2, |DR3| is 3,
1340
+ # |DR6| is 6, and |DR7| is 7.
1341
+ # * Test registers: |TR3| is 3, |TR4| is 4, |TR5| is 5, |TR6| is 6,
1342
+ # and |TR7| is 7.
1343
+
1344
+ # (Note that wherever a register name contains a number, that number is
1345
+ # also the register value for that register.)
1346
+
1347
+
1348
+ # A.2.2 Condition Codes
1349
+
1350
+ # The available condition codes are given here, along with their numeric
1351
+ # representations as part of opcodes. Many of these condition codes have
1352
+ # synonyms, so several will be listed at a time.
1353
+
1354
+ # In the following descriptions, the word `either', when applied to two
1355
+ # possible trigger conditions, is used to mean `either or both'. If
1356
+ # `either but not both' is meant, the phrase `exactly one of' is used.
1357
+
1358
+ # * |O| is 0 (trigger if the overflow flag is set); |NO| is 1.
1359
+ # * |B|, |C| and |NAE| are 2 (trigger if the carry flag is set); |AE|,
1360
+ # |NB| and |NC| are 3.
1361
+ # * |E| and |Z| are 4 (trigger if the zero flag is set); |NE| and |NZ|
1362
+ # are 5.
1363
+ # * |BE| and |NA| are 6 (trigger if either of the carry or zero flags
1364
+ # is set); |A| and |NBE| are 7.
1365
+ # * |S| is 8 (trigger if the sign flag is set); |NS| is 9.
1366
+ # * |P| and |PE| are 10 (trigger if the parity flag is set); |NP| and
1367
+ # |PO| are 11.
1368
+ # * |L| and |NGE| are 12 (trigger if exactly one of the sign and
1369
+ # overflow flags is set); |GE| and |NL| are 13.
1370
+ # * |LE| and |NG| are 14 (trigger if either the zero flag is set, or
1371
+ # exactly one of the sign and overflow flags is set); |G| and |NLE|
1372
+ # are 15.
1373
+
1374
+ # Note that in all cases, the sense of a condition code may be reversed by
1375
+ # changing the low bit of the numeric representation.
1376
+
1377
+
1378
+ # A.2.3 Effective Address Encoding: ModR/M and SIB
1379
+
1380
+ # An effective address is encoded in up to three parts: a ModR/M byte, an
1381
+ # optional SIB byte, and an optional byte, word or doubleword displacement
1382
+ # field.
1383
+
1384
+ # The ModR/M byte consists of three fields: the |mod| field, ranging from
1385
+ # 0 to 3, in the upper two bits of the byte, the |r/m| field, ranging from
1386
+ # 0 to 7, in the lower three bits, and the spare (register) field in the
1387
+ # middle (bit 3 to bit 5). The spare field is not relevant to the
1388
+ # effective address being encoded, and either contains an extension to the
1389
+ # instruction opcode or the register value of another operand.
1390
+
1391
+ # The ModR/M system can be used to encode a direct register reference
1392
+ # rather than a memory access. This is always done by setting the |mod|
1393
+ # field to 3 and the |r/m| field to the register value of the register in
1394
+ # question (it must be a general-purpose register, and the size of the
1395
+ # register must already be implicit in the encoding of the rest of the
1396
+ # instruction). In this case, the SIB byte and displacement field are both
1397
+ # absent.
1398
+
1399
+ # In 16-bit addressing mode (either |BITS 16| with no |67| prefix, or
1400
+ # |BITS 32| with a |67| prefix), the SIB byte is never used. The general
1401
+ # rules for |mod| and |r/m| (there is an exception, given below) are:
1402
+
1403
+ # * The |mod| field gives the length of the displacement field: 0
1404
+ # means no displacement, 1 means one byte, and 2 means two bytes.
1405
+ # * The |r/m| field encodes the combination of registers to be added
1406
+ # to the displacement to give the accessed address: 0 means |BX+SI|,
1407
+ # 1 means |BX+DI|, 2 means |BP+SI|, 3 means |BP+DI|, 4 means |SI|
1408
+ # only, 5 means |DI| only, 6 means |BP| only, and 7 means |BX| only.
1409
+
1410
+ # However, there is a special case:
1411
+
1412
+ # * If |mod| is 0 and |r/m| is 6, the effective address encoded is not
1413
+ # |[BP]| as the above rules would suggest, but instead |[disp16]|:
1414
+ # the displacement field is present and is two bytes long, and no
1415
+ # registers are added to the displacement.
1416
+
1417
+ # Therefore the effective address |[BP]| cannot be encoded as efficiently
1418
+ # as |[BX]|; so if you code |[BP]| in a program, NASM adds a notional
1419
+ # 8-bit zero displacement, and sets |mod| to 1, |r/m| to 6, and the
1420
+ # one-byte displacement field to 0.
1421
+
1422
+ # In 32-bit addressing mode (either |BITS 16| with a |67| prefix, or |BITS
1423
+ # 32| with no |67| prefix) the general rules (again, there are exceptions)
1424
+ # for |mod| and |r/m| are:
1425
+
1426
+ # * The |mod| field gives the length of the displacement field: 0
1427
+ # means no displacement, 1 means one byte, and 2 means four bytes.
1428
+ # * If only one register is to be added to the displacement, and it is
1429
+ # not |ESP|, the |r/m| field gives its register value, and the SIB
1430
+ # byte is absent. If the |r/m| field is 4 (which would encode
1431
+ # |ESP|), the SIB byte is present and gives the combination and
1432
+ # scaling of registers to be added to the displacement.
1433
+
1434
+ # If the SIB byte is present, it describes the combination of registers
1435
+ # (an optional base register, and an optional index register scaled by
1436
+ # multiplication by 1, 2, 4 or 8) to be added to the displacement. The SIB
1437
+ # byte is divided into the |scale| field, in the top two bits, the |index|
1438
+ # field in the next three, and the |base| field in the bottom three. The
1439
+ # general rules are:
1440
+
1441
+ # * The |base| field encodes the register value of the base register.
1442
+ # * The |index| field encodes the register value of the index
1443
+ # register, unless it is 4, in which case no index register is used
1444
+ # (so |ESP| cannot be used as an index register).
1445
+ # * The |scale| field encodes the multiplier by which the index
1446
+ # register is scaled before adding it to the base and displacement:
1447
+ # 0 encodes a multiplier of 1, 1 encodes 2, 2 encodes 4 and 3
1448
+ # encodes 8.
1449
+
1450
+ # The exceptions to the 32-bit encoding rules are:
1451
+
1452
+ # * If |mod| is 0 and |r/m| is 5, the effective address encoded is not
1453
+ # |[EBP]| as the above rules would suggest, but instead |[disp32]|:
1454
+ # the displacement field is present and is four bytes long, and no
1455
+ # registers are added to the displacement.
1456
+ # * If |mod| is 0, |r/m| is 4 (meaning the SIB byte is present) and
1457
+ # |base| is 4, the effective address encoded is not |[EBP+index]| as
1458
+ # the above rules would suggest, but instead |[disp32+index]|: the
1459
+ # displacement field is present and is four bytes long, and there is
1460
+ # no base register (but the index register is still processed in the
1461
+ # normal way).
1462
+
1463
+
1464
+ # A.3 Key to Instruction Flags
1465
+
1466
+ # Given along with each instruction in this appendix is a set of flags,
1467
+ # denoting the type of the instruction. The types are as follows:
1468
+
1469
+ # * |8086|, |186|, |286|, |386|, |486|, |PENT| and |P6| denote the
1470
+ # lowest processor type that supports the instruction. Most
1471
+ # instructions run on all processors above the given type; those
1472
+ # that do not are documented. The Pentium II contains no additional
1473
+ # instructions beyond the P6 (Pentium Pro); from the point of view
1474
+ # of its instruction set, it can be thought of as a P6 with MMX
1475
+ # capability.
1476
+ # * |CYRIX| indicates that the instruction is specific to Cyrix
1477
+ # processors, for example the extra MMX instructions in the Cyrix
1478
+ # extended MMX instruction set.
1479
+ # * |FPU| indicates that the instruction is a floating-point one, and
1480
+ # will only run on machines with a coprocessor (automatically
1481
+ # including 486DX, Pentium and above).
1482
+ # * |MMX| indicates that the instruction is an MMX one, and will run
1483
+ # on MMX-capable Pentium processors and the Pentium II.
1484
+ # * |PRIV| indicates that the instruction is a protected-mode
1485
+ # management instruction. Many of these may only be used in
1486
+ # protected mode, or only at privilege level zero.
1487
+ # * |UNDOC| indicates that the instruction is an undocumented one, and
1488
+ # not part of the official Intel Architecture; it may or may not be
1489
+ # supported on any given machine.
1490
+
1491
+
1492
+ # A.4 |AAA|, |AAS|, |AAM|, |AAD|: ASCII Adjustments
1493
+
1494
+ # AAA ; 37 [8086]
1495
+
1496
+ # AAS ; 3F [8086]
1497
+
1498
+ # AAD ; D5 0A [8086]
1499
+ # AAD imm ; D5 ib [8086]
1500
+
1501
+ # AAM ; D4 0A [8086]
1502
+ # AAM imm ; D4 ib [8086]
1503
+
1504
+ # These instructions are used in conjunction with the add, subtract,
1505
+ # multiply and divide instructions to perform binary-coded decimal
1506
+ # arithmetic in /unpacked/ (one BCD digit per byte - easy to translate to
1507
+ # and from ASCII, hence the instruction names) form. There are also packed
1508
+ # BCD instructions |DAA| and |DAS|: see section A.23 <#section-A.23>.
1509
+
1510
+ # |AAA| should be used after a one-byte |ADD| instruction whose
1511
+ # destination was the |AL| register: by means of examining the value in
1512
+ # the low nibble of |AL| and also the auxiliary carry flag |AF|, it
1513
+ # determines whether the addition has overflowed, and adjusts it (and sets
1514
+ # the carry flag) if so. You can add long BCD strings together by doing
1515
+ # |ADD|/|AAA| on the low digits, then doing |ADC|/|AAA| on each subsequent
1516
+ # digit.
1517
+
1518
+ # |AAS| works similarly to |AAA|, but is for use after |SUB| instructions
1519
+ # rather than |ADD|.
1520
+
1521
+ # |AAM| is for use after you have multiplied two decimal digits together
1522
+ # and left the result in |AL|: it divides |AL| by ten and stores the
1523
+ # quotient in |AH|, leaving the remainder in |AL|. The divisor 10 can be
1524
+ # changed by specifying an operand to the instruction: a particularly
1525
+ # handy use of this is |AAM 16|, causing the two nibbles in |AL| to be
1526
+ # separated into |AH| and |AL|.
1527
+
1528
+ # |AAD| performs the inverse operation to |AAM|: it multiplies |AH| by
1529
+ # ten, adds it to |AL|, and sets |AH| to zero. Again, the multiplier 10
1530
+ # can be changed.
1531
+
1532
+
1533
+ # A.5 |ADC|: Add with Carry
1534
+
1535
+ # ADC r/m8,reg8 ; 10 /r [8086]
1536
+ # ADC r/m16,reg16 ; o16 11 /r [8086]
1537
+ # ADC r/m32,reg32 ; o32 11 /r [386]
1538
+
1539
+ # ADC reg8,r/m8 ; 12 /r [8086]
1540
+ # ADC reg16,r/m16 ; o16 13 /r [8086]
1541
+ # ADC reg32,r/m32 ; o32 13 /r [386]
1542
+
1543
+ # ADC r/m8,imm8 ; 80 /2 ib [8086]
1544
+ # ADC r/m16,imm16 ; o16 81 /2 iw [8086]
1545
+ # ADC r/m32,imm32 ; o32 81 /2 id [386]
1546
+
1547
+ # ADC r/m16,imm8 ; o16 83 /2 ib [8086]
1548
+ # ADC r/m32,imm8 ; o32 83 /2 ib [386]
1549
+
1550
+ # ADC AL,imm8 ; 14 ib [8086]
1551
+ # ADC AX,imm16 ; o16 15 iw [8086]
1552
+ # ADC EAX,imm32 ; o32 15 id [386]
1553
+
1554
+ # |ADC| performs integer addition: it adds its two operands together, plus
1555
+ # the value of the carry flag, and leaves the result in its destination
1556
+ # (first) operand. The flags are set according to the result of the
1557
+ # operation: in particular, the carry flag is affected and can be used by
1558
+ # a subsequent |ADC| instruction.
1559
+
1560
+ # In the forms with an 8-bit immediate second operand and a longer first
1561
+ # operand, the second operand is considered to be signed, and is
1562
+ # sign-extended to the length of the first operand. In these cases, the
1563
+ # |BYTE| qualifier is necessary to force NASM to generate this form of the
1564
+ # instruction.
1565
+
1566
+ # To add two numbers without also adding the contents of the carry flag,
1567
+ # use |ADD| (section A.6 <#section-A.6>).
1568
+
1569
+
1570
+ # A.6 |ADD|: Add Integers
1571
+
1572
+ # ADD r/m8,reg8 ; 00 /r [8086]
1573
+ # ADD r/m16,reg16 ; o16 01 /r [8086]
1574
+ # ADD r/m32,reg32 ; o32 01 /r [386]
1575
+
1576
+ # ADD reg8,r/m8 ; 02 /r [8086]
1577
+ # ADD reg16,r/m16 ; o16 03 /r [8086]
1578
+ # ADD reg32,r/m32 ; o32 03 /r [386]
1579
+
1580
+ # ADD r/m8,imm8 ; 80 /0 ib [8086]
1581
+ # ADD r/m16,imm16 ; o16 81 /0 iw [8086]
1582
+ # ADD r/m32,imm32 ; o32 81 /0 id [386]
1583
+
1584
+ # ADD r/m16,imm8 ; o16 83 /0 ib [8086]
1585
+ # ADD r/m32,imm8 ; o32 83 /0 ib [386]
1586
+
1587
+ # ADD AL,imm8 ; 04 ib [8086]
1588
+ # ADD AX,imm16 ; o16 05 iw [8086]
1589
+ # ADD EAX,imm32 ; o32 05 id [386]
1590
+
1591
+ # |ADD| performs integer addition: it adds its two operands together, and
1592
+ # leaves the result in its destination (first) operand. The flags are set
1593
+ # according to the result of the operation: in particular, the carry flag
1594
+ # is affected and can be used by a subsequent |ADC| instruction (section
1595
+ # A.5 <#section-A.5>).
1596
+
1597
+ # In the forms with an 8-bit immediate second operand and a longer first
1598
+ # operand, the second operand is considered to be signed, and is
1599
+ # sign-extended to the length of the first operand. In these cases, the
1600
+ # |BYTE| qualifier is necessary to force NASM to generate this form of the
1601
+ # instruction.
1602
+
1603
+
1604
+ # A.7 |AND|: Bitwise AND
1605
+
1606
+ # AND r/m8,reg8 ; 20 /r [8086]
1607
+ # AND r/m16,reg16 ; o16 21 /r [8086]
1608
+ # AND r/m32,reg32 ; o32 21 /r [386]
1609
+
1610
+ # AND reg8,r/m8 ; 22 /r [8086]
1611
+ # AND reg16,r/m16 ; o16 23 /r [8086]
1612
+ # AND reg32,r/m32 ; o32 23 /r [386]
1613
+
1614
+ # AND r/m8,imm8 ; 80 /4 ib [8086]
1615
+ # AND r/m16,imm16 ; o16 81 /4 iw [8086]
1616
+ # AND r/m32,imm32 ; o32 81 /4 id [386]
1617
+
1618
+ # AND r/m16,imm8 ; o16 83 /4 ib [8086]
1619
+ # AND r/m32,imm8 ; o32 83 /4 ib [386]
1620
+
1621
+ # AND AL,imm8 ; 24 ib [8086]
1622
+ # AND AX,imm16 ; o16 25 iw [8086]
1623
+ # AND EAX,imm32 ; o32 25 id [386]
1624
+
1625
+ # |AND| performs a bitwise AND operation between its two operands (i.e.
1626
+ # each bit of the result is 1 if and only if the corresponding bits of the
1627
+ # two inputs were both 1), and stores the result in the destination
1628
+ # (first) operand.
1629
+
1630
+ # In the forms with an 8-bit immediate second operand and a longer first
1631
+ # operand, the second operand is considered to be signed, and is
1632
+ # sign-extended to the length of the first operand. In these cases, the
1633
+ # |BYTE| qualifier is necessary to force NASM to generate this form of the
1634
+ # instruction.
1635
+
1636
+ # The MMX instruction |PAND| (see section A.116 <#section-A.116>) performs
1637
+ # the same operation on the 64-bit MMX registers.
1638
+
1639
+
1640
+ # A.8 |ARPL|: Adjust RPL Field of Selector
1641
+
1642
+ # ARPL r/m16,reg16 ; 63 /r [286,PRIV]
1643
+
1644
+ # |ARPL| expects its two word operands to be segment selectors. It adjusts
1645
+ # the RPL (requested privilege level - stored in the bottom two bits of
1646
+ # the selector) field of the destination (first) operand to ensure that it
1647
+ # is no less (i.e. no more privileged than) the RPL field of the source
1648
+ # operand. The zero flag is set if and only if a change had to be made.
1649
+
1650
+
1651
+ # A.9 |BOUND|: Check Array Index against Bounds
1652
+
1653
+ # BOUND reg16,mem ; o16 62 /r [186]
1654
+ # BOUND reg32,mem ; o32 62 /r [386]
1655
+
1656
+ # |BOUND| expects its second operand to point to an area of memory
1657
+ # containing two signed values of the same size as its first operand (i.e.
1658
+ # two words for the 16-bit form; two doublewords for the 32-bit form). It
1659
+ # performs two signed comparisons: if the value in the register passed as
1660
+ # its first operand is less than the first of the in-memory values, or is
1661
+ # greater than or equal to the second, it throws a BR exception.
1662
+ # Otherwise, it does nothing.
1663
+
1664
+
1665
+ # A.10 |BSF|, |BSR|: Bit Scan
1666
+
1667
+ # BSF reg16,r/m16 ; o16 0F BC /r [386]
1668
+ # BSF reg32,r/m32 ; o32 0F BC /r [386]
1669
+
1670
+ # BSR reg16,r/m16 ; o16 0F BD /r [386]
1671
+ # BSR reg32,r/m32 ; o32 0F BD /r [386]
1672
+
1673
+ # |BSF| searches for a set bit in its source (second) operand, starting
1674
+ # from the bottom, and if it finds one, stores the index in its
1675
+ # destination (first) operand. If no set bit is found, the contents of the
1676
+ # destination operand are undefined.
1677
+
1678
+ # |BSR| performs the same function, but searches from the top instead, so
1679
+ # it finds the most significant set bit.
1680
+
1681
+ # Bit indices are from 0 (least significant) to 15 or 31 (most significant).
1682
+
1683
+
1684
+ # A.11 |BSWAP|: Byte Swap
1685
+
1686
+ # BSWAP reg32 ; o32 0F C8+r [486]
1687
+
1688
+ # |BSWAP| swaps the order of the four bytes of a 32-bit register: bits 0-7
1689
+ # exchange places with bits 24-31, and bits 8-15 swap with bits 16-23.
1690
+ # There is no explicit 16-bit equivalent: to byte-swap |AX|, |BX|, |CX| or
1691
+ # |DX|, |XCHG| can be used.
1692
+
1693
+
1694
+ # A.12 |BT|, |BTC|, |BTR|, |BTS|: Bit Test
1695
+
1696
+ # BT r/m16,reg16 ; o16 0F A3 /r [386]
1697
+ # BT r/m32,reg32 ; o32 0F A3 /r [386]
1698
+ # BT r/m16,imm8 ; o16 0F BA /4 ib [386]
1699
+ # BT r/m32,imm8 ; o32 0F BA /4 ib [386]
1700
+
1701
+ # BTC r/m16,reg16 ; o16 0F BB /r [386]
1702
+ # BTC r/m32,reg32 ; o32 0F BB /r [386]
1703
+ # BTC r/m16,imm8 ; o16 0F BA /7 ib [386]
1704
+ # BTC r/m32,imm8 ; o32 0F BA /7 ib [386]
1705
+
1706
+ # BTR r/m16,reg16 ; o16 0F B3 /r [386]
1707
+ # BTR r/m32,reg32 ; o32 0F B3 /r [386]
1708
+ # BTR r/m16,imm8 ; o16 0F BA /6 ib [386]
1709
+ # BTR r/m32,imm8 ; o32 0F BA /6 ib [386]
1710
+
1711
+ # BTS r/m16,reg16 ; o16 0F AB /r [386]
1712
+ # BTS r/m32,reg32 ; o32 0F AB /r [386]
1713
+ # BTS r/m16,imm ; o16 0F BA /5 ib [386]
1714
+ # BTS r/m32,imm ; o32 0F BA /5 ib [386]
1715
+
1716
+ # These instructions all test one bit of their first operand, whose index
1717
+ # is given by the second operand, and store the value of that bit into the
1718
+ # carry flag. Bit indices are from 0 (least significant) to 15 or 31 (most
1719
+ # significant).
1720
+
1721
+ # In addition to storing the original value of the bit into the carry
1722
+ # flag, |BTR| also resets (clears) the bit in the operand itself. |BTS|
1723
+ # sets the bit, and |BTC| complements the bit. |BT| does not modify its
1724
+ # operands.
1725
+
1726
+ # The bit offset should be no greater than the size of the operand.
1727
+
1728
+
1729
+ # A.13 |CALL|: Call Subroutine
1730
+
1731
+ # CALL imm ; E8 rw/rd [8086]
1732
+ # CALL imm:imm16 ; o16 9A iw iw [8086]
1733
+ # CALL imm:imm32 ; o32 9A id iw [386]
1734
+ # CALL FAR mem16 ; o16 FF /3 [8086]
1735
+ # CALL FAR mem32 ; o32 FF /3 [386]
1736
+ # CALL r/m16 ; o16 FF /2 [8086]
1737
+ # CALL r/m32 ; o32 FF /2 [386]
1738
+
1739
+ # |CALL| calls a subroutine, by means of pushing the current instruction
1740
+ # pointer (|IP|) and optionally |CS| as well on the stack, and then
1741
+ # jumping to a given address.
1742
+
1743
+ # |CS| is pushed as well as |IP| if and only if the call is a far call,
1744
+ # i.e. a destination segment address is specified in the instruction. The
1745
+ # forms involving two colon-separated arguments are far calls; so are the
1746
+ # |CALL FAR mem| forms.
1747
+
1748
+ # You can choose between the two immediate far call forms (|CALL imm:imm|)
1749
+ # by the use of the |WORD| and |DWORD| keywords: |CALL WORD
1750
+ # 0x1234:0x5678|) or |CALL DWORD 0x1234:0x56789abc|.
1751
+
1752
+ # The |CALL FAR mem| forms execute a far call by loading the destination
1753
+ # address out of memory. The address loaded consists of 16 or 32 bits of
1754
+ # offset (depending on the operand size), and 16 bits of segment. The
1755
+ # operand size may be overridden using |CALL WORD FAR mem| or |CALL DWORD
1756
+ # FAR mem|.
1757
+
1758
+ # The |CALL r/m| forms execute a near call (within the same segment),
1759
+ # loading the destination address out of memory or out of a register. The
1760
+ # keyword |NEAR| may be specified, for clarity, in these forms, but is not
1761
+ # necessary. Again, operand size can be overridden using |CALL WORD mem|
1762
+ # or |CALL DWORD mem|.
1763
+
1764
+ # As a convenience, NASM does not require you to call a far procedure
1765
+ # symbol by coding the cumbersome |CALL SEG routine:routine|, but instead
1766
+ # allows the easier synonym |CALL FAR routine|.
1767
+
1768
+ # The |CALL r/m| forms given above are near calls; NASM will accept the
1769
+ # |NEAR| keyword (e.g. |CALL NEAR [address]|), even though it is not
1770
+ # strictly necessary.
1771
+
1772
+
1773
+ # A.14 |CBW|, |CWD|, |CDQ|, |CWDE|: Sign Extensions
1774
+
1775
+ # CBW ; o16 98 [8086]
1776
+ # CWD ; o16 99 [8086]
1777
+ # CDQ ; o32 99 [386]
1778
+ # CWDE ; o32 98 [386]
1779
+
1780
+ # All these instructions sign-extend a short value into a longer one, by
1781
+ # replicating the top bit of the original value to fill the extended one.
1782
+
1783
+ # |CBW| extends |AL| into |AX| by repeating the top bit of |AL| in every
1784
+ # bit of |AH|. |CWD| extends |AX| into |DX:AX| by repeating the top bit of
1785
+ # |AX| throughout |DX|. |CWDE| extends |AX| into |EAX|, and |CDQ| extends
1786
+ # |EAX| into |EDX:EAX|.
1787
+
1788
+
1789
+ # A.15 |CLC|, |CLD|, |CLI|, |CLTS|: Clear Flags
1790
+
1791
+ # CLC ; F8 [8086]
1792
+ # CLD ; FC [8086]
1793
+ # CLI ; FA [8086]
1794
+ # CLTS ; 0F 06 [286,PRIV]
1795
+
1796
+ # These instructions clear various flags. |CLC| clears the carry flag;
1797
+ # |CLD| clears the direction flag; |CLI| clears the interrupt flag (thus
1798
+ # disabling interrupts); and |CLTS| clears the task-switched (|TS|) flag
1799
+ # in |CR0|.
1800
+
1801
+ # To set the carry, direction, or interrupt flags, use the |STC|, |STD|
1802
+ # and |STI| instructions (section A.156 <#section-A.156>). To invert the
1803
+ # carry flag, use |CMC| (section A.16 <#section-A.16>).
1804
+
1805
+
1806
+ # A.16 |CMC|: Complement Carry Flag
1807
+
1808
+ # CMC ; F5 [8086]
1809
+
1810
+ # |CMC| changes the value of the carry flag: if it was 0, it sets it to 1,
1811
+ # and vice versa.
1812
+
1813
+
1814
+ # A.17 |CMOVcc|: Conditional Move
1815
+
1816
+ # CMOVcc reg16,r/m16 ; o16 0F 40+cc /r [P6]
1817
+ # CMOVcc reg32,r/m32 ; o32 0F 40+cc /r [P6]
1818
+
1819
+ # |CMOV| moves its source (second) operand into its destination (first)
1820
+ # operand if the given condition code is satisfied; otherwise it does
1821
+ # nothing.
1822
+
1823
+ # For a list of condition codes, see section A.2.2 <#section-A.2.2>.
1824
+
1825
+ # Although the |CMOV| instructions are flagged |P6| above, they may not be
1826
+ # supported by all Pentium Pro processors; the |CPUID| instruction
1827
+ # (section A.22 <#section-A.22>) will return a bit which indicates whether
1828
+ # conditional moves are supported.
1829
+
1830
+
1831
+ # A.18 |CMP|: Compare Integers
1832
+
1833
+ # CMP r/m8,reg8 ; 38 /r [8086]
1834
+ # CMP r/m16,reg16 ; o16 39 /r [8086]
1835
+ # CMP r/m32,reg32 ; o32 39 /r [386]
1836
+
1837
+ # CMP reg8,r/m8 ; 3A /r [8086]
1838
+ # CMP reg16,r/m16 ; o16 3B /r [8086]
1839
+ # CMP reg32,r/m32 ; o32 3B /r [386]
1840
+
1841
+ # CMP r/m8,imm8 ; 80 /0 ib [8086]
1842
+ # CMP r/m16,imm16 ; o16 81 /0 iw [8086]
1843
+ # CMP r/m32,imm32 ; o32 81 /0 id [386]
1844
+
1845
+ # CMP r/m16,imm8 ; o16 83 /0 ib [8086]
1846
+ # CMP r/m32,imm8 ; o32 83 /0 ib [386]
1847
+
1848
+ # CMP AL,imm8 ; 3C ib [8086]
1849
+ # CMP AX,imm16 ; o16 3D iw [8086]
1850
+ # CMP EAX,imm32 ; o32 3D id [386]
1851
+
1852
+ # |CMP| performs a `mental' subtraction of its second operand from its
1853
+ # first operand, and affects the flags as if the subtraction had taken
1854
+ # place, but does not store the result of the subtraction anywhere.
1855
+
1856
+ # In the forms with an 8-bit immediate second operand and a longer first
1857
+ # operand, the second operand is considered to be signed, and is
1858
+ # sign-extended to the length of the first operand. In these cases, the
1859
+ # |BYTE| qualifier is necessary to force NASM to generate this form of the
1860
+ # instruction.
1861
+
1862
+
1863
+ # A.19 |CMPSB|, |CMPSW|, |CMPSD|: Compare Strings
1864
+
1865
+ # CMPSB ; A6 [8086]
1866
+ # CMPSW ; o16 A7 [8086]
1867
+ # CMPSD ; o32 A7 [386]
1868
+
1869
+ # |CMPSB| compares the byte at |[DS:SI]| or |[DS:ESI]| with the byte at
1870
+ # |[ES:DI]| or |[ES:EDI]|, and sets the flags accordingly. It then
1871
+ # increments or decrements (depending on the direction flag: increments if
1872
+ # the flag is clear, decrements if it is set) |SI| and |DI| (or |ESI| and
1873
+ # |EDI|).
1874
+
1875
+ # The registers used are |SI| and |DI| if the address size is 16 bits, and
1876
+ # |ESI| and |EDI| if it is 32 bits. If you need to use an address size not
1877
+ # equal to the current |BITS| setting, you can use an explicit |a16| or
1878
+ # |a32| prefix.
1879
+
1880
+ # The segment register used to load from |[SI]| or |[ESI]| can be
1881
+ # overridden by using a segment register name as a prefix (for example,
1882
+ # |es cmpsb|). The use of |ES| for the load from |[DI]| or |[EDI]| cannot
1883
+ # be overridden.
1884
+
1885
+ # |CMPSW| and |CMPSD| work in the same way, but they compare a word or a
1886
+ # doubleword instead of a byte, and increment or decrement the addressing
1887
+ # registers by 2 or 4 instead of 1.
1888
+
1889
+ # The |REPE| and |REPNE| prefixes (equivalently, |REPZ| and |REPNZ|) may
1890
+ # be used to repeat the instruction up to |CX| (or |ECX| - again, the
1891
+ # address size chooses which) times until the first unequal or equal byte
1892
+ # is found.
1893
+
1894
+
1895
+ # A.20 |CMPXCHG|, |CMPXCHG486|: Compare and Exchange
1896
+
1897
+ # CMPXCHG r/m8,reg8 ; 0F B0 /r [PENT]
1898
+ # CMPXCHG r/m16,reg16 ; o16 0F B1 /r [PENT]
1899
+ # CMPXCHG r/m32,reg32 ; o32 0F B1 /r [PENT]
1900
+
1901
+ # CMPXCHG486 r/m8,reg8 ; 0F A6 /r [486,UNDOC]
1902
+ # CMPXCHG486 r/m16,reg16 ; o16 0F A7 /r [486,UNDOC]
1903
+ # CMPXCHG486 r/m32,reg32 ; o32 0F A7 /r [486,UNDOC]
1904
+
1905
+ # These two instructions perform exactly the same operation; however,
1906
+ # apparently some (not all) 486 processors support it under a non-standard
1907
+ # opcode, so NASM provides the undocumented |CMPXCHG486| form to generate
1908
+ # the non-standard opcode.
1909
+
1910
+ # |CMPXCHG| compares its destination (first) operand to the value in |AL|,
1911
+ # |AX| or |EAX| (depending on the size of the instruction). If they are
1912
+ # equal, it copies its source (second) operand into the destination and
1913
+ # sets the zero flag. Otherwise, it clears the zero flag and leaves the
1914
+ # destination alone.
1915
+
1916
+ # |CMPXCHG| is intended to be used for atomic operations in multitasking
1917
+ # or multiprocessor environments. To safely update a value in shared
1918
+ # memory, for example, you might load the value into |EAX|, load the
1919
+ # updated value into |EBX|, and then execute the instruction |lock cmpxchg
1920
+ # [value],ebx|. If |value| has not changed since being loaded, it is
1921
+ # updated with your desired new value, and the zero flag is set to let you
1922
+ # know it has worked. (The |LOCK| prefix prevents another processor doing
1923
+ # anything in the middle of this operation: it guarantees atomicity.)
1924
+ # However, if another processor has modified the value in between your
1925
+ # load and your attempted store, the store does not happen, and you are
1926
+ # notified of the failure by a cleared zero flag, so you can go round and
1927
+ # try again.
1928
+
1929
+
1930
+ # A.21 |CMPXCHG8B|: Compare and Exchange Eight Bytes
1931
+
1932
+ # CMPXCHG8B mem ; 0F C7 /1 [PENT]
1933
+
1934
+ # This is a larger and more unwieldy version of |CMPXCHG|: it compares the
1935
+ # 64-bit (eight-byte) value stored at |[mem]| with the value in |EDX:EAX|.
1936
+ # If they are equal, it sets the zero flag and stores |ECX:EBX| into the
1937
+ # memory area. If they are unequal, it clears the zero flag and leaves the
1938
+ # memory area untouched.
1939
+
1940
+
1941
+ # A.22 |CPUID|: Get CPU Identification Code
1942
+
1943
+ # CPUID ; 0F A2 [PENT]
1944
+
1945
+ # |CPUID| returns various information about the processor it is being
1946
+ # executed on. It fills the four registers |EAX|, |EBX|, |ECX| and |EDX|
1947
+ # with information, which varies depending on the input contents of |EAX|.
1948
+
1949
+ # |CPUID| also acts as a barrier to serialise instruction execution:
1950
+ # executing the |CPUID| instruction guarantees that all the effects
1951
+ # (memory modification, flag modification, register modification) of
1952
+ # previous instructions have been completed before the next instruction
1953
+ # gets fetched.
1954
+
1955
+ # The information returned is as follows:
1956
+
1957
+ # * If |EAX| is zero on input, |EAX| on output holds the maximum
1958
+ # acceptable input value of |EAX|, and |EBX:EDX:ECX| contain the
1959
+ # string |"GenuineIntel"| (or not, if you have a clone processor).
1960
+ # That is to say, |EBX| contains |"Genu"| (in NASM's own sense of
1961
+ # character constants, described in section 3.4.2
1962
+ # <nasmdoc3.html#section-3.4.2>), |EDX| contains |"ineI"| and |ECX|
1963
+ # contains |"ntel"|.
1964
+ # * If |EAX| is one on input, |EAX| on output contains version
1965
+ # information about the processor, and |EDX| contains a set of
1966
+ # feature flags, showing the presence and absence of various
1967
+ # features. For example, bit 8 is set if the |CMPXCHG8B| instruction
1968
+ # (section A.21 <#section-A.21>) is supported, bit 15 is set if the
1969
+ # conditional move instructions (section A.17 <#section-A.17> and
1970
+ # section A.34 <#section-A.34>) are supported, and bit 23 is set if
1971
+ # MMX instructions are supported.
1972
+ # * If |EAX| is two on input, |EAX|, |EBX|, |ECX| and |EDX| all
1973
+ # contain information about caches and TLBs (Translation Lookahead
1974
+ # Buffers).
1975
+
1976
+ # For more information on the data returned from |CPUID|, see the
1977
+ # documentation on Intel's web site.
1978
+
1979
+
1980
+ # A.23 |DAA|, |DAS|: Decimal Adjustments
1981
+
1982
+ # DAA ; 27 [8086]
1983
+ # DAS ; 2F [8086]
1984
+
1985
+ # These instructions are used in conjunction with the add and subtract
1986
+ # instructions to perform binary-coded decimal arithmetic in /packed/ (one
1987
+ # BCD digit per nibble) form. For the unpacked equivalents, see section
1988
+ # A.4 <#section-A.4>.
1989
+
1990
+ # |DAA| should be used after a one-byte |ADD| instruction whose
1991
+ # destination was the |AL| register: by means of examining the value in
1992
+ # the |AL| and also the auxiliary carry flag |AF|, it determines whether
1993
+ # either digit of the addition has overflowed, and adjusts it (and sets
1994
+ # the carry and auxiliary-carry flags) if so. You can add long BCD strings
1995
+ # together by doing |ADD|/|DAA| on the low two digits, then doing
1996
+ # |ADC|/|DAA| on each subsequent pair of digits.
1997
+
1998
+ # |DAS| works similarly to |DAA|, but is for use after |SUB| instructions
1999
+ # rather than |ADD|.
2000
+
2001
+
2002
+ # A.24 |DEC|: Decrement Integer
2003
+
2004
+ # DEC reg16 ; o16 48+r [8086]
2005
+ # DEC reg32 ; o32 48+r [386]
2006
+ # DEC r/m8 ; FE /1 [8086]
2007
+ # DEC r/m16 ; o16 FF /1 [8086]
2008
+ # DEC r/m32 ; o32 FF /1 [386]
2009
+
2010
+ # |DEC| subtracts 1 from its operand. It does /not/ affect the carry flag:
2011
+ # to affect the carry flag, use |SUB something,1| (see section A.159
2012
+ # <#section-A.159>). See also |INC| (section A.79 <#section-A.79>).
2013
+
2014
+
2015
+ # A.25 |DIV|: Unsigned Integer Divide
2016
+
2017
+ # DIV r/m8 ; F6 /6 [8086]
2018
+ # DIV r/m16 ; o16 F7 /6 [8086]
2019
+ # DIV r/m32 ; o32 F7 /6 [386]
2020
+
2021
+ # |DIV| performs unsigned integer division. The explicit operand provided
2022
+ # is the divisor; the dividend and destination operands are implicit, in
2023
+ # the following way:
2024
+
2025
+ # * For |DIV r/m8|, |AX| is divided by the given operand; the quotient
2026
+ # is stored in |AL| and the remainder in |AH|.
2027
+ # * For |DIV r/m16|, |DX:AX| is divided by the given operand; the
2028
+ # quotient is stored in |AX| and the remainder in |DX|.
2029
+ # * For |DIV r/m32|, |EDX:EAX| is divided by the given operand; the
2030
+ # quotient is stored in |EAX| and the remainder in |EDX|.
2031
+
2032
+ # Signed integer division is performed by the |IDIV| instruction: see
2033
+ # section A.76 <#section-A.76>.
2034
+
2035
+
2036
+ # A.26 |EMMS|: Empty MMX State
2037
+
2038
+ # MMS ; 0F 77 [PENT,MMX]
2039
+
2040
+ # |EMMS| sets the FPU tag word (marking which floating-point registers are
2041
+ # available) to all ones, meaning all registers are available for the FPU
2042
+ # to use. It should be used after executing MMX instructions and before
2043
+ # executing any subsequent floating-point operations.
2044
+
2045
+
2046
+ # A.27 |ENTER|: Create Stack Frame
2047
+
2048
+ # ENTER imm,imm ; C8 iw ib [186]
2049
+
2050
+ # |ENTER| constructs a stack frame for a high-level language procedure
2051
+ # call. The first operand (the |iw| in the opcode definition above refers
2052
+ # to the first operand) gives the amount of stack space to allocate for
2053
+ # local variables; the second (the |ib| above) gives the nesting level of
2054
+ # the procedure (for languages like Pascal, with nested procedures).
2055
+
2056
+ # The function of |ENTER|, with a nesting level of zero, is equivalent to
2057
+
2058
+ # PUSH EBP ; or PUSH BP in 16 bits
2059
+ # MOV EBP,ESP ; or MOV BP,SP in 16 bits
2060
+ # SUB ESP,operand1 ; or SUB SP,operand1 in 16 bits
2061
+
2062
+ # This creates a stack frame with the procedure parameters accessible
2063
+ # upwards from |EBP|, and local variables accessible downwards from |EBP|.
2064
+
2065
+ # With a nesting level of one, the stack frame created is 4 (or 2) bytes
2066
+ # bigger, and the value of the final frame pointer |EBP| is accessible in
2067
+ # memory at |[EBP-4]|.
2068
+
2069
+ # This allows |ENTER|, when called with a nesting level of two, to look at
2070
+ # the stack frame described by the /previous/ value of |EBP|, find the
2071
+ # frame pointer at offset -4 from that, and push it along with its new
2072
+ # frame pointer, so that when a level-two procedure is called from within
2073
+ # a level-one procedure, |[EBP-4]| holds the frame pointer of the most
2074
+ # recent level-one procedure call and |[EBP-8]| holds that of the most
2075
+ # recent level-two call. And so on, for nesting levels up to 31.
2076
+
2077
+ # Stack frames created by |ENTER| can be destroyed by the |LEAVE|
2078
+ # instruction: see section A.94 <#section-A.94>.
2079
+
2080
+
2081
+ # A.28 |F2XM1|: Calculate 2**X-1
2082
+
2083
+ # F2XM1 ; D9 F0 [8086,FPU]
2084
+
2085
+ # |F2XM1| raises 2 to the power of |ST0|, subtracts one, and stores the
2086
+ # result back into |ST0|. The initial contents of |ST0| must be a number
2087
+ # in the range -1 to +1.
2088
+
2089
+
2090
+ # A.29 |FABS|: Floating-Point Absolute Value
2091
+
2092
+ # FABS ; D9 E1 [8086,FPU]
2093
+
2094
+ # |FABS| computes the absolute value of |ST0|, storing the result back in
2095
+ # |ST0|.
2096
+
2097
+
2098
+ # A.30 |FADD|, |FADDP|: Floating-Point Addition
2099
+
2100
+ # FADD mem32 ; D8 /0 [8086,FPU]
2101
+ # FADD mem64 ; DC /0 [8086,FPU]
2102
+
2103
+ # FADD fpureg ; D8 C0+r [8086,FPU]
2104
+ # FADD ST0,fpureg ; D8 C0+r [8086,FPU]
2105
+
2106
+ # FADD TO fpureg ; DC C0+r [8086,FPU]
2107
+ # FADD fpureg,ST0 ; DC C0+r [8086,FPU]
2108
+
2109
+ # FADDP fpureg ; DE C0+r [8086,FPU]
2110
+ # FADDP fpureg,ST0 ; DE C0+r [8086,FPU]
2111
+
2112
+ # |FADD|, given one operand, adds the operand to |ST0| and stores the
2113
+ # result back in |ST0|. If the operand has the |TO| modifier, the result
2114
+ # is stored in the register given rather than in |ST0|.
2115
+
2116
+ # |FADDP| performs the same function as |FADD TO|, but pops the register
2117
+ # stack after storing the result.
2118
+
2119
+ # The given two-operand forms are synonyms for the one-operand forms.
2120
+
2121
+
2122
+ # A.31 |FBLD|, |FBSTP|: BCD Floating-Point Load and Store
2123
+
2124
+ # FBLD mem80 ; DF /4 [8086,FPU]
2125
+ # FBSTP mem80 ; DF /6 [8086,FPU]
2126
+
2127
+ # |FBLD| loads an 80-bit (ten-byte) packed binary-coded decimal number
2128
+ # from the given memory address, converts it to a real, and pushes it on
2129
+ # the register stack. |FBSTP| stores the value of |ST0|, in packed BCD, at
2130
+ # the given address and then pops the register stack.
2131
+
2132
+
2133
+ # A.32 |FCHS|: Floating-Point Change Sign
2134
+
2135
+ # FCHS ; D9 E0 [8086,FPU]
2136
+
2137
+ # |FCHS| negates the number in |ST0|: negative numbers become positive,
2138
+ # and vice versa.
2139
+
2140
+
2141
+ # A.33 |FCLEX|, {FNCLEX}: Clear Floating-Point Exceptions
2142
+
2143
+ # FCLEX ; 9B DB E2 [8086,FPU]
2144
+ # FNCLEX ; DB E2 [8086,FPU]
2145
+
2146
+ # |FCLEX| clears any floating-point exceptions which may be pending.
2147
+ # |FNCLEX| does the same thing but doesn't wait for previous
2148
+ # floating-point operations (including the /handling/ of pending
2149
+ # exceptions) to finish first.
2150
+
2151
+
2152
+ # A.34 |FCMOVcc|: Floating-Point Conditional Move
2153
+
2154
+ # FCMOVB fpureg ; DA C0+r [P6,FPU]
2155
+ # FCMOVB ST0,fpureg ; DA C0+r [P6,FPU]
2156
+
2157
+ # FCMOVBE fpureg ; DA D0+r [P6,FPU]
2158
+ # FCMOVBE ST0,fpureg ; DA D0+r [P6,FPU]
2159
+
2160
+ # FCMOVE fpureg ; DA C8+r [P6,FPU]
2161
+ # FCMOVE ST0,fpureg ; DA C8+r [P6,FPU]
2162
+
2163
+ # FCMOVNB fpureg ; DB C0+r [P6,FPU]
2164
+ # FCMOVNB ST0,fpureg ; DB C0+r [P6,FPU]
2165
+
2166
+ # FCMOVNBE fpureg ; DB D0+r [P6,FPU]
2167
+ # FCMOVNBE ST0,fpureg ; DB D0+r [P6,FPU]
2168
+
2169
+ # FCMOVNE fpureg ; DB C8+r [P6,FPU]
2170
+ # FCMOVNE ST0,fpureg ; DB C8+r [P6,FPU]
2171
+
2172
+ # FCMOVNU fpureg ; DB D8+r [P6,FPU]
2173
+ # FCMOVNU ST0,fpureg ; DB D8+r [P6,FPU]
2174
+
2175
+ # FCMOVU fpureg ; DA D8+r [P6,FPU]
2176
+ # FCMOVU ST0,fpureg ; DA D8+r [P6,FPU]
2177
+
2178
+ # The |FCMOV| instructions perform conditional move operations: each of
2179
+ # them moves the contents of the given register into |ST0| if its
2180
+ # condition is satisfied, and does nothing if not.
2181
+
2182
+ # The conditions are not the same as the standard condition codes used
2183
+ # with conditional jump instructions. The conditions |B|, |BE|, |NB|,
2184
+ # |NBE|, |E| and |NE| are exactly as normal, but none of the other
2185
+ # standard ones are supported. Instead, the condition |U| and its
2186
+ # counterpart |NU| are provided; the |U| condition is satisfied if the
2187
+ # last two floating-point numbers compared were /unordered/, i.e. they
2188
+ # were not equal but neither one could be said to be greater than the
2189
+ # other, for example if they were NaNs. (The flag state which signals this
2190
+ # is the setting of the parity flag: so the |U| condition is notionally
2191
+ # equivalent to |PE|, and |NU| is equivalent to |PO|.)
2192
+
2193
+ # The |FCMOV| conditions test the main processor's status flags, not the
2194
+ # FPU status flags, so using |FCMOV| directly after |FCOM| will not work.
2195
+ # Instead, you should either use |FCOMI| which writes directly to the main
2196
+ # CPU flags word, or use |FSTSW| to extract the FPU flags.
2197
+
2198
+ # Although the |FCMOV| instructions are flagged |P6| above, they may not
2199
+ # be supported by all Pentium Pro processors; the |CPUID| instruction
2200
+ # (section A.22 <#section-A.22>) will return a bit which indicates whether
2201
+ # conditional moves are supported.
2202
+
2203
+
2204
+ # A.35 |FCOM|, |FCOMP|, |FCOMPP|, |FCOMI|, |FCOMIP|: Floating-Point
2205
+ # Compare
2206
+
2207
+ # FCOM mem32 ; D8 /2 [8086,FPU]
2208
+ # FCOM mem64 ; DC /2 [8086,FPU]
2209
+ # FCOM fpureg ; D8 D0+r [8086,FPU]
2210
+ # FCOM ST0,fpureg ; D8 D0+r [8086,FPU]
2211
+
2212
+ # FCOMP mem32 ; D8 /3 [8086,FPU]
2213
+ # FCOMP mem64 ; DC /3 [8086,FPU]
2214
+ # FCOMP fpureg ; D8 D8+r [8086,FPU]
2215
+ # FCOMP ST0,fpureg ; D8 D8+r [8086,FPU]
2216
+
2217
+ # FCOMPP ; DE D9 [8086,FPU]
2218
+
2219
+ # FCOMI fpureg ; DB F0+r [P6,FPU]
2220
+ # FCOMI ST0,fpureg ; DB F0+r [P6,FPU]
2221
+
2222
+ # FCOMIP fpureg ; DF F0+r [P6,FPU]
2223
+ # FCOMIP ST0,fpureg ; DF F0+r [P6,FPU]
2224
+
2225
+ # |FCOM| compares |ST0| with the given operand, and sets the FPU flags
2226
+ # accordingly. |ST0| is treated as the left-hand side of the comparison,
2227
+ # so that the carry flag is set (for a `less-than' result) if |ST0| is
2228
+ # less than the given operand.
2229
+
2230
+ # |FCOMP| does the same as |FCOM|, but pops the register stack afterwards.
2231
+ # |FCOMPP| compares |ST0| with |ST1| and then pops the register stack twice.
2232
+
2233
+ # |FCOMI| and |FCOMIP| work like the corresponding forms of |FCOM| and
2234
+ # |FCOMP|, but write their results directly to the CPU flags register
2235
+ # rather than the FPU status word, so they can be immediately followed by
2236
+ # conditional jump or conditional move instructions.
2237
+
2238
+ # The |FCOM| instructions differ from the |FUCOM| instructions (section
2239
+ # A.69 <#section-A.69>) only in the way they handle quiet NaNs: |FUCOM|
2240
+ # will handle them silently and set the condition code flags to an
2241
+ # `unordered' result, whereas |FCOM| will generate an exception.
2242
+
2243
+
2244
+ # A.36 |FCOS|: Cosine
2245
+
2246
+ # FCOS ; D9 FF [386,FPU]
2247
+
2248
+ # |FCOS| computes the cosine of |ST0| (in radians), and stores the result
2249
+ # in |ST0|. See also |FSINCOS| (section A.61 <#section-A.61>).
2250
+
2251
+
2252
+ # A.37 |FDECSTP|: Decrement Floating-Point Stack Pointer
2253
+
2254
+ # FDECSTP ; D9 F6 [8086,FPU]
2255
+
2256
+ # |FDECSTP| decrements the `top' field in the floating-point status word.
2257
+ # This has the effect of rotating the FPU register stack by one, as if the
2258
+ # contents of |ST7| had been pushed on the stack. See also |FINCSTP|
2259
+ # (section A.46 <#section-A.46>).
2260
+
2261
+
2262
+ # A.38 |FxDISI|, |FxENI|: Disable and Enable Floating-Point Interrupts
2263
+
2264
+ # FDISI ; 9B DB E1 [8086,FPU]
2265
+ # FNDISI ; DB E1 [8086,FPU]
2266
+
2267
+ # FENI ; 9B DB E0 [8086,FPU]
2268
+ # FNENI ; DB E0 [8086,FPU]
2269
+
2270
+ # |FDISI| and |FENI| disable and enable floating-point interrupts. These
2271
+ # instructions are only meaningful on original 8087 processors: the 287
2272
+ # and above treat them as no-operation instructions.
2273
+
2274
+ # |FNDISI| and |FNENI| do the same thing as |FDISI| and |FENI|
2275
+ # respectively, but without waiting for the floating-point processor to
2276
+ # finish what it was doing first.
2277
+
2278
+
2279
+ # A.39 |FDIV|, |FDIVP|, |FDIVR|, |FDIVRP|: Floating-Point Division
2280
+
2281
+ # FDIV mem32 ; D8 /6 [8086,FPU]
2282
+ # FDIV mem64 ; DC /6 [8086,FPU]
2283
+
2284
+ # FDIV fpureg ; D8 F0+r [8086,FPU]
2285
+ # FDIV ST0,fpureg ; D8 F0+r [8086,FPU]
2286
+
2287
+ # FDIV TO fpureg ; DC F8+r [8086,FPU]
2288
+ # FDIV fpureg,ST0 ; DC F8+r [8086,FPU]
2289
+
2290
+ # FDIVR mem32 ; D8 /0 [8086,FPU]
2291
+ # FDIVR mem64 ; DC /0 [8086,FPU]
2292
+
2293
+ # FDIVR fpureg ; D8 F8+r [8086,FPU]
2294
+ # FDIVR ST0,fpureg ; D8 F8+r [8086,FPU]
2295
+
2296
+ # FDIVR TO fpureg ; DC F0+r [8086,FPU]
2297
+ # FDIVR fpureg,ST0 ; DC F0+r [8086,FPU]
2298
+
2299
+ # FDIVP fpureg ; DE F8+r [8086,FPU]
2300
+ # FDIVP fpureg,ST0 ; DE F8+r [8086,FPU]
2301
+
2302
+ # FDIVRP fpureg ; DE F0+r [8086,FPU]
2303
+ # FDIVRP fpureg,ST0 ; DE F0+r [8086,FPU]
2304
+
2305
+ # |FDIV| divides |ST0| by the given operand and stores the result back in
2306
+ # |ST0|, unless the |TO| qualifier is given, in which case it divides the
2307
+ # given operand by |ST0| and stores the result in the operand.
2308
+
2309
+ # |FDIVR| does the same thing, but does the division the other way up: so
2310
+ # if |TO| is not given, it divides the given operand by |ST0| and stores
2311
+ # the result in |ST0|, whereas if |TO| is given it divides |ST0| by its
2312
+ # operand and stores the result in the operand.
2313
+
2314
+ # |FDIVP| operates like |FDIV TO|, but pops the register stack once it has
2315
+ # finished. |FDIVRP| operates like |FDIVR TO|, but pops the register stack
2316
+ # once it has finished.
2317
+
2318
+
2319
+ # A.40 |FFREE|: Flag Floating-Point Register as Unused
2320
+
2321
+ # FFREE fpureg ; DD C0+r [8086,FPU]
2322
+
2323
+ # |FFREE| marks the given register as being empty.
2324
+
2325
+
2326
+ # A.41 |FIADD|: Floating-Point/Integer Addition
2327
+
2328
+ # FIADD mem16 ; DE /0 [8086,FPU]
2329
+ # FIADD mem32 ; DA /0 [8086,FPU]
2330
+
2331
+ # |FIADD| adds the 16-bit or 32-bit integer stored in the given memory
2332
+ # location to |ST0|, storing the result in |ST0|.
2333
+
2334
+
2335
+ # A.42 |FICOM|, |FICOMP|: Floating-Point/Integer Compare
2336
+
2337
+ # FICOM mem16 ; DE /2 [8086,FPU]
2338
+ # FICOM mem32 ; DA /2 [8086,FPU]
2339
+
2340
+ # FICOMP mem16 ; DE /3 [8086,FPU]
2341
+ # FICOMP mem32 ; DA /3 [8086,FPU]
2342
+
2343
+ # |FICOM| compares |ST0| with the 16-bit or 32-bit integer stored in the
2344
+ # given memory location, and sets the FPU flags accordingly. |FICOMP| does
2345
+ # the same, but pops the register stack afterwards.
2346
+
2347
+
2348
+ # A.43 |FIDIV|, |FIDIVR|: Floating-Point/Integer Division
2349
+
2350
+ # FIDIV mem16 ; DE /6 [8086,FPU]
2351
+ # FIDIV mem32 ; DA /6 [8086,FPU]
2352
+
2353
+ # FIDIVR mem16 ; DE /0 [8086,FPU]
2354
+ # FIDIVR mem32 ; DA /0 [8086,FPU]
2355
+
2356
+ # |FIDIV| divides |ST0| by the 16-bit or 32-bit integer stored in the
2357
+ # given memory location, and stores the result in |ST0|. |FIDIVR| does the
2358
+ # division the other way up: it divides the integer by |ST0|, but still
2359
+ # stores the result in |ST0|.
2360
+
2361
+
2362
+ # A.44 |FILD|, |FIST|, |FISTP|: Floating-Point/Integer Conversion
2363
+
2364
+ # FILD mem16 ; DF /0 [8086,FPU]
2365
+ # FILD mem32 ; DB /0 [8086,FPU]
2366
+ # FILD mem64 ; DF /5 [8086,FPU]
2367
+
2368
+ # FIST mem16 ; DF /2 [8086,FPU]
2369
+ # FIST mem32 ; DB /2 [8086,FPU]
2370
+
2371
+ # FISTP mem16 ; DF /3 [8086,FPU]
2372
+ # FISTP mem32 ; DB /3 [8086,FPU]
2373
+ # FISTP mem64 ; DF /0 [8086,FPU]
2374
+
2375
+ # |FILD| loads an integer out of a memory location, converts it to a real,
2376
+ # and pushes it on the FPU register stack. |FIST| converts |ST0| to an
2377
+ # integer and stores that in memory; |FISTP| does the same as |FIST|, but
2378
+ # pops the register stack afterwards.
2379
+
2380
+
2381
+ # A.45 |FIMUL|: Floating-Point/Integer Multiplication
2382
+
2383
+ # FIMUL mem16 ; DE /1 [8086,FPU]
2384
+ # FIMUL mem32 ; DA /1 [8086,FPU]
2385
+
2386
+ # |FIMUL| multiplies |ST0| by the 16-bit or 32-bit integer stored in the
2387
+ # given memory location, and stores the result in |ST0|.
2388
+
2389
+
2390
+ # A.46 |FINCSTP|: Increment Floating-Point Stack Pointer
2391
+
2392
+ # FINCSTP ; D9 F7 [8086,FPU]
2393
+
2394
+ # |FINCSTP| increments the `top' field in the floating-point status word.
2395
+ # This has the effect of rotating the FPU register stack by one, as if the
2396
+ # register stack had been popped; however, unlike the popping of the stack
2397
+ # performed by many FPU instructions, it does not flag the new |ST7|
2398
+ # (previously |ST0|) as empty. See also |FDECSTP| (section A.37
2399
+ # <#section-A.37>).
2400
+
2401
+
2402
+ # A.47 |FINIT|, |FNINIT|: Initialise Floating-Point Unit
2403
+
2404
+ # FINIT ; 9B DB E3 [8086,FPU]
2405
+ # FNINIT ; DB E3 [8086,FPU]
2406
+
2407
+ # |FINIT| initialises the FPU to its default state. It flags all registers
2408
+ # as empty, though it does not actually change their values. |FNINIT| does
2409
+ # the same, without first waiting for pending exceptions to clear.
2410
+
2411
+
2412
+ # A.48 |FISUB|: Floating-Point/Integer Subtraction
2413
+
2414
+ # FISUB mem16 ; DE /4 [8086,FPU]
2415
+ # FISUB mem32 ; DA /4 [8086,FPU]
2416
+
2417
+ # FISUBR mem16 ; DE /5 [8086,FPU]
2418
+ # FISUBR mem32 ; DA /5 [8086,FPU]
2419
+
2420
+ # |FISUB| subtracts the 16-bit or 32-bit integer stored in the given
2421
+ # memory location from |ST0|, and stores the result in |ST0|. |FISUBR|
2422
+ # does the subtraction the other way round, i.e. it subtracts |ST0| from
2423
+ # the given integer, but still stores the result in |ST0|.
2424
+
2425
+
2426
+ # A.49 |FLD|: Floating-Point Load
2427
+
2428
+ # FLD mem32 ; D9 /0 [8086,FPU]
2429
+ # FLD mem64 ; DD /0 [8086,FPU]
2430
+ # FLD mem80 ; DB /5 [8086,FPU]
2431
+ # FLD fpureg ; D9 C0+r [8086,FPU]
2432
+
2433
+ # |FLD| loads a floating-point value out of the given register or memory
2434
+ # location, and pushes it on the FPU register stack.
2435
+
2436
+
2437
+ # A.50 |FLDxx|: Floating-Point Load Constants
2438
+
2439
+ # FLD1 ; D9 E8 [8086,FPU]
2440
+ # FLDL2E ; D9 EA [8086,FPU]
2441
+ # FLDL2T ; D9 E9 [8086,FPU]
2442
+ # FLDLG2 ; D9 EC [8086,FPU]
2443
+ # FLDLN2 ; D9 ED [8086,FPU]
2444
+ # FLDPI ; D9 EB [8086,FPU]
2445
+ # FLDZ ; D9 EE [8086,FPU]
2446
+
2447
+ # These instructions push specific standard constants on the FPU register
2448
+ # stack. |FLD1| pushes the value 1; |FLDL2E| pushes the base-2 logarithm
2449
+ # of e; |FLDL2T| pushes the base-2 log of 10; |FLDLG2| pushes the base-10
2450
+ # log of 2; |FLDLN2| pushes the base-e log of 2; |FLDPI| pushes pi; and
2451
+ # |FLDZ| pushes zero.
2452
+
2453
+
2454
+ # A.51 |FLDCW|: Load Floating-Point Control Word
2455
+
2456
+ # FLDCW mem16 ; D9 /5 [8086,FPU]
2457
+
2458
+ # |FLDCW| loads a 16-bit value out of memory and stores it into the FPU
2459
+ # control word (governing things like the rounding mode, the precision,
2460
+ # and the exception masks). See also |FSTCW| (section A.64 <#section-A.64>).
2461
+
2462
+
2463
+ # A.52 |FLDENV|: Load Floating-Point Environment
2464
+
2465
+ # FLDENV mem ; D9 /4 [8086,FPU]
2466
+
2467
+ # |FLDENV| loads the FPU operating environment (control word, status word,
2468
+ # tag word, instruction pointer, data pointer and last opcode) from
2469
+ # memory. The memory area is 14 or 28 bytes long, depending on the CPU
2470
+ # mode at the time. See also |FSTENV| (section A.65 <#section-A.65>).
2471
+
2472
+
2473
+ # A.53 |FMUL|, |FMULP|: Floating-Point Multiply
2474
+
2475
+ # FMUL mem32 ; D8 /1 [8086,FPU]
2476
+ # FMUL mem64 ; DC /1 [8086,FPU]
2477
+
2478
+ # FMUL fpureg ; D8 C8+r [8086,FPU]
2479
+ # FMUL ST0,fpureg ; D8 C8+r [8086,FPU]
2480
+
2481
+ # FMUL TO fpureg ; DC C8+r [8086,FPU]
2482
+ # FMUL fpureg,ST0 ; DC C8+r [8086,FPU]
2483
+
2484
+ # FMULP fpureg ; DE C8+r [8086,FPU]
2485
+ # FMULP fpureg,ST0 ; DE C8+r [8086,FPU]
2486
+
2487
+ # |FMUL| multiplies |ST0| by the given operand, and stores the result in
2488
+ # |ST0|, unless the |TO| qualifier is used in which case it stores the
2489
+ # result in the operand. |FMULP| performs the same operation as |FMUL TO|,
2490
+ # and then pops the register stack.
2491
+
2492
+
2493
+ # A.54 |FNOP|: Floating-Point No Operation
2494
+
2495
+ # FNOP ; D9 D0 [8086,FPU]
2496
+
2497
+ # |FNOP| does nothing.
2498
+
2499
+
2500
+ # A.55 |FPATAN|, |FPTAN|: Arctangent and Tangent
2501
+
2502
+ # FPATAN ; D9 F3 [8086,FPU]
2503
+ # FPTAN ; D9 F2 [8086,FPU]
2504
+
2505
+ # |FPATAN| computes the arctangent, in radians, of the result of dividing
2506
+ # |ST1| by |ST0|, stores the result in |ST1|, and pops the register stack.
2507
+ # It works like the C |atan2| function, in that changing the sign of both
2508
+ # |ST0| and |ST1| changes the output value by pi (so it performs true
2509
+ # rectangular-to-polar coordinate conversion, with |ST1| being the Y
2510
+ # coordinate and |ST0| being the X coordinate, not merely an arctangent).
2511
+
2512
+ # |FPTAN| computes the tangent of the value in |ST0| (in radians), and
2513
+ # stores the result back into |ST0|.
2514
+
2515
+
2516
+ # A.56 |FPREM|, |FPREM1|: Floating-Point Partial Remainder
2517
+
2518
+ # FPREM ; D9 F8 [8086,FPU]
2519
+ # FPREM1 ; D9 F5 [386,FPU]
2520
+
2521
+ # These instructions both produce the remainder obtained by dividing |ST0|
2522
+ # by |ST1|. This is calculated, notionally, by dividing |ST0| by |ST1|,
2523
+ # rounding the result to an integer, multiplying by |ST1| again, and
2524
+ # computing the value which would need to be added back on to the result
2525
+ # to get back to the original value in |ST0|.
2526
+
2527
+ # The two instructions differ in the way the notional round-to-integer
2528
+ # operation is performed. |FPREM| does it by rounding towards zero, so
2529
+ # that the remainder it returns always has the same sign as the original
2530
+ # value in |ST0|; |FPREM1| does it by rounding to the nearest integer, so
2531
+ # that the remainder always has at most half the magnitude of |ST1|.
2532
+
2533
+ # Both instructions calculate /partial/ remainders, meaning that they may
2534
+ # not manage to provide the final result, but might leave intermediate
2535
+ # results in |ST0| instead. If this happens, they will set the C2 flag in
2536
+ # the FPU status word; therefore, to calculate a remainder, you should
2537
+ # repeatedly execute |FPREM| or |FPREM1| until C2 becomes clear.
2538
+
2539
+
2540
+ # A.57 |FRNDINT|: Floating-Point Round to Integer
2541
+
2542
+ # FRNDINT ; D9 FC [8086,FPU]
2543
+
2544
+ # |FRNDINT| rounds the contents of |ST0| to an integer, according to the
2545
+ # current rounding mode set in the FPU control word, and stores the result
2546
+ # back in |ST0|.
2547
+
2548
+
2549
+ # A.58 |FSAVE|, |FRSTOR|: Save/Restore Floating-Point State
2550
+
2551
+ # FSAVE mem ; 9B DD /6 [8086,FPU]
2552
+ # FNSAVE mem ; DD /6 [8086,FPU]
2553
+
2554
+ # FRSTOR mem ; DD /4 [8086,FPU]
2555
+
2556
+ # |FSAVE| saves the entire floating-point unit state, including all the
2557
+ # information saved by |FSTENV| (section A.65 <#section-A.65>) plus the
2558
+ # contents of all the registers, to a 94 or 108 byte area of memory
2559
+ # (depending on the CPU mode). |FRSTOR| restores the floating-point state
2560
+ # from the same area of memory.
2561
+
2562
+ # |FNSAVE| does the same as |FSAVE|, without first waiting for pending
2563
+ # floating-point exceptions to clear.
2564
+
2565
+
2566
+ # A.59 |FSCALE|: Scale Floating-Point Value by Power of Two
2567
+
2568
+ # FSCALE ; D9 FD [8086,FPU]
2569
+
2570
+ # |FSCALE| scales a number by a power of two: it rounds |ST1| towards zero
2571
+ # to obtain an integer, then multiplies |ST0| by two to the power of that
2572
+ # integer, and stores the result in |ST0|.
2573
+
2574
+
2575
+ # A.60 |FSETPM|: Set Protected Mode
2576
+
2577
+ # FSETPM ; DB E4 [286,FPU]
2578
+
2579
+ # This instruction initalises protected mode on the 287 floating-point
2580
+ # coprocessor. It is only meaningful on that processor: the 387 and above
2581
+ # treat the instruction as a no-operation.
2582
+
2583
+
2584
+ # A.61 |FSIN|, |FSINCOS|: Sine and Cosine
2585
+
2586
+ # FSIN ; D9 FE [386,FPU]
2587
+ # FSINCOS ; D9 FB [386,FPU]
2588
+
2589
+ # |FSIN| calculates the sine of |ST0| (in radians) and stores the result
2590
+ # in |ST0|. |FSINCOS| does the same, but then pushes the cosine of the
2591
+ # same value on the register stack, so that the sine ends up in |ST1| and
2592
+ # the cosine in |ST0|. |FSINCOS| is faster than executing |FSIN| and
2593
+ # |FCOS| (see section A.36 <#section-A.36>) in succession.
2594
+
2595
+
2596
+ # A.62 |FSQRT|: Floating-Point Square Root
2597
+
2598
+ # FSQRT ; D9 FA [8086,FPU]
2599
+
2600
+ # |FSQRT| calculates the square root of |ST0| and stores the result in |ST0|.
2601
+
2602
+
2603
+ # A.63 |FST|, |FSTP|: Floating-Point Store
2604
+
2605
+ # FST mem32 ; D9 /2 [8086,FPU]
2606
+ # FST mem64 ; DD /2 [8086,FPU]
2607
+ # FST fpureg ; DD D0+r [8086,FPU]
2608
+
2609
+ # FSTP mem32 ; D9 /3 [8086,FPU]
2610
+ # FSTP mem64 ; DD /3 [8086,FPU]
2611
+ # FSTP mem80 ; DB /0 [8086,FPU]
2612
+ # FSTP fpureg ; DD D8+r [8086,FPU]
2613
+
2614
+ # |FST| stores the value in |ST0| into the given memory location or other
2615
+ # FPU register. |FSTP| does the same, but then pops the register stack.
2616
+
2617
+
2618
+ # A.64 |FSTCW|: Store Floating-Point Control Word
2619
+
2620
+ # FSTCW mem16 ; 9B D9 /0 [8086,FPU]
2621
+ # FNSTCW mem16 ; D9 /0 [8086,FPU]
2622
+
2623
+ # |FSTCW| stores the FPU control word (governing things like the rounding
2624
+ # mode, the precision, and the exception masks) into a 2-byte memory area.
2625
+ # See also |FLDCW| (section A.51 <#section-A.51>).
2626
+
2627
+ # |FNSTCW| does the same thing as |FSTCW|, without first waiting for
2628
+ # pending floating-point exceptions to clear.
2629
+
2630
+
2631
+ # A.65 |FSTENV|: Store Floating-Point Environment
2632
+
2633
+ # FSTENV mem ; 9B D9 /6 [8086,FPU]
2634
+ # FNSTENV mem ; D9 /6 [8086,FPU]
2635
+
2636
+ # |FSTENV| stores the FPU operating environment (control word, status
2637
+ # word, tag word, instruction pointer, data pointer and last opcode) into
2638
+ # memory. The memory area is 14 or 28 bytes long, depending on the CPU
2639
+ # mode at the time. See also |FLDENV| (section A.52 <#section-A.52>).
2640
+
2641
+ # |FNSTENV| does the same thing as |FSTENV|, without first waiting for
2642
+ # pending floating-point exceptions to clear.
2643
+
2644
+
2645
+ # A.66 |FSTSW|: Store Floating-Point Status Word
2646
+
2647
+ # FSTSW mem16 ; 9B DD /0 [8086,FPU]
2648
+ # FSTSW AX ; 9B DF E0 [286,FPU]
2649
+
2650
+ # FNSTSW mem16 ; DD /0 [8086,FPU]
2651
+ # FNSTSW AX ; DF E0 [286,FPU]
2652
+
2653
+ # |FSTSW| stores the FPU status word into |AX| or into a 2-byte memory area.
2654
+
2655
+ # |FNSTSW| does the same thing as |FSTSW|, without first waiting for
2656
+ # pending floating-point exceptions to clear.
2657
+
2658
+
2659
+ # A.67 |FSUB|, |FSUBP|, |FSUBR|, |FSUBRP|: Floating-Point Subtract
2660
+
2661
+ # FSUB mem32 ; D8 /4 [8086,FPU]
2662
+ # FSUB mem64 ; DC /4 [8086,FPU]
2663
+
2664
+ # FSUB fpureg ; D8 E0+r [8086,FPU]
2665
+ # FSUB ST0,fpureg ; D8 E0+r [8086,FPU]
2666
+
2667
+ # FSUB TO fpureg ; DC E8+r [8086,FPU]
2668
+ # FSUB fpureg,ST0 ; DC E8+r [8086,FPU]
2669
+
2670
+ # FSUBR mem32 ; D8 /5 [8086,FPU]
2671
+ # FSUBR mem64 ; DC /5 [8086,FPU]
2672
+
2673
+ # FSUBR fpureg ; D8 E8+r [8086,FPU]
2674
+ # FSUBR ST0,fpureg ; D8 E8+r [8086,FPU]
2675
+
2676
+ # FSUBR TO fpureg ; DC E0+r [8086,FPU]
2677
+ # FSUBR fpureg,ST0 ; DC E0+r [8086,FPU]
2678
+
2679
+ # FSUBP fpureg ; DE E8+r [8086,FPU]
2680
+ # FSUBP fpureg,ST0 ; DE E8+r [8086,FPU]
2681
+
2682
+ # FSUBRP fpureg ; DE E0+r [8086,FPU]
2683
+ # FSUBRP fpureg,ST0 ; DE E0+r [8086,FPU]
2684
+
2685
+ # |FSUB| subtracts the given operand from |ST0| and stores the result back
2686
+ # in |ST0|, unless the |TO| qualifier is given, in which case it subtracts
2687
+ # |ST0| from the given operand and stores the result in the operand.
2688
+
2689
+ # |FSUBR| does the same thing, but does the subtraction the other way up:
2690
+ # so if |TO| is not given, it subtracts |ST0| from the given operand and
2691
+ # stores the result in |ST0|, whereas if |TO| is given it subtracts its
2692
+ # operand from |ST0| and stores the result in the operand.
2693
+
2694
+ # |FSUBP| operates like |FSUB TO|, but pops the register stack once it has
2695
+ # finished. |FSUBRP| operates like |FSUBR TO|, but pops the register stack
2696
+ # once it has finished.
2697
+
2698
+
2699
+ # A.68 |FTST|: Test |ST0| Against Zero
2700
+
2701
+ # FTST ; D9 E4 [8086,FPU]
2702
+
2703
+ # |FTST| compares |ST0| with zero and sets the FPU flags accordingly.
2704
+ # |ST0| is treated as the left-hand side of the comparison, so that a
2705
+ # `less-than' result is generated if |ST0| is negative.
2706
+
2707
+
2708
+ # A.69 |FUCOMxx|: Floating-Point Unordered Compare
2709
+
2710
+ # FUCOM fpureg ; DD E0+r [386,FPU]
2711
+ # FUCOM ST0,fpureg ; DD E0+r [386,FPU]
2712
+
2713
+ # FUCOMP fpureg ; DD E8+r [386,FPU]
2714
+ # FUCOMP ST0,fpureg ; DD E8+r [386,FPU]
2715
+
2716
+ # FUCOMPP ; DA E9 [386,FPU]
2717
+
2718
+ # FUCOMI fpureg ; DB E8+r [P6,FPU]
2719
+ # FUCOMI ST0,fpureg ; DB E8+r [P6,FPU]
2720
+
2721
+ # FUCOMIP fpureg ; DF E8+r [P6,FPU]
2722
+ # FUCOMIP ST0,fpureg ; DF E8+r [P6,FPU]
2723
+
2724
+ # |FUCOM| compares |ST0| with the given operand, and sets the FPU flags
2725
+ # accordingly. |ST0| is treated as the left-hand side of the comparison,
2726
+ # so that the carry flag is set (for a `less-than' result) if |ST0| is
2727
+ # less than the given operand.
2728
+
2729
+ # |FUCOMP| does the same as |FUCOM|, but pops the register stack
2730
+ # afterwards. |FUCOMPP| compares |ST0| with |ST1| and then pops the
2731
+ # register stack twice.
2732
+
2733
+ # |FUCOMI| and |FUCOMIP| work like the corresponding forms of |FUCOM| and
2734
+ # |FUCOMP|, but write their results directly to the CPU flags register
2735
+ # rather than the FPU status word, so they can be immediately followed by
2736
+ # conditional jump or conditional move instructions.
2737
+
2738
+ # The |FUCOM| instructions differ from the |FCOM| instructions (section
2739
+ # A.35 <#section-A.35>) only in the way they handle quiet NaNs: |FUCOM|
2740
+ # will handle them silently and set the condition code flags to an
2741
+ # `unordered' result, whereas |FCOM| will generate an exception.
2742
+
2743
+
2744
+ # A.70 |FXAM|: Examine Class of Value in |ST0|
2745
+
2746
+ # FXAM ; D9 E5 [8086,FPU]
2747
+
2748
+ # |FXAM| sets the FPU flags C3, C2 and C0 depending on the type of value
2749
+ # stored in |ST0|: 000 (respectively) for an unsupported format, 001 for a
2750
+ # NaN, 010 for a normal finite number, 011 for an infinity, 100 for a
2751
+ # zero, 101 for an empty register, and 110 for a denormal. It also sets
2752
+ # the C1 flag to the sign of the number.
2753
+
2754
+
2755
+ # A.71 |FXCH|: Floating-Point Exchange
2756
+
2757
+ # FXCH ; D9 C9 [8086,FPU]
2758
+ # FXCH fpureg ; D9 C8+r [8086,FPU]
2759
+ # FXCH fpureg,ST0 ; D9 C8+r [8086,FPU]
2760
+ # FXCH ST0,fpureg ; D9 C8+r [8086,FPU]
2761
+
2762
+ # |FXCH| exchanges |ST0| with a given FPU register. The no-operand form
2763
+ # exchanges |ST0| with |ST1|.
2764
+
2765
+
2766
+ # A.72 |FXTRACT|: Extract Exponent and Significand
2767
+
2768
+ # FXTRACT ; D9 F4 [8086,FPU]
2769
+
2770
+ # |FXTRACT| separates the number in |ST0| into its exponent and
2771
+ # significand (mantissa), stores the exponent back into |ST0|, and then
2772
+ # pushes the significand on the register stack (so that the significand
2773
+ # ends up in |ST0|, and the exponent in |ST1|).
2774
+
2775
+
2776
+ # A.73 |FYL2X|, |FYL2XP1|: Compute Y times Log2(X) or Log2(X+1)
2777
+
2778
+ # FYL2X ; D9 F1 [8086,FPU]
2779
+ # FYL2XP1 ; D9 F9 [8086,FPU]
2780
+
2781
+ # |FYL2X| multiplies |ST1| by the base-2 logarithm of |ST0|, stores the
2782
+ # result in |ST1|, and pops the register stack (so that the result ends up
2783
+ # in |ST0|). |ST0| must be non-zero and positive.
2784
+
2785
+ # |FYL2XP1| works the same way, but replacing the base-2 log of |ST0| with
2786
+ # that of |ST0| plus one. This time, |ST0| must have magnitude no greater
2787
+ # than 1 minus half the square root of two.
2788
+
2789
+
2790
+ # A.74 |HLT|: Halt Processor
2791
+
2792
+ # HLT ; F4 [8086]
2793
+
2794
+ # |HLT| puts the processor into a halted state, where it will perform no
2795
+ # more operations until restarted by an interrupt or a reset.
2796
+
2797
+
2798
+ # A.75 |IBTS|: Insert Bit String
2799
+
2800
+ # IBTS r/m16,reg16 ; o16 0F A7 /r [386,UNDOC]
2801
+ # IBTS r/m32,reg32 ; o32 0F A7 /r [386,UNDOC]
2802
+
2803
+ # No clear documentation seems to be available for this instruction: the
2804
+ # best I've been able to find reads `Takes a string of bits from the
2805
+ # second operand and puts them in the first operand'. It is present only
2806
+ # in early 386 processors, and conflicts with the opcodes for
2807
+ # |CMPXCHG486|. NASM supports it only for completeness. Its counterpart is
2808
+ # |XBTS| (see section A.167 <#section-A.167>).
2809
+
2810
+
2811
+ # A.76 |IDIV|: Signed Integer Divide
2812
+
2813
+ # IDIV r/m8 ; F6 /7 [8086]
2814
+ # IDIV r/m16 ; o16 F7 /7 [8086]
2815
+ # IDIV r/m32 ; o32 F7 /7 [386]
2816
+
2817
+ # |IDIV| performs signed integer division. The explicit operand provided
2818
+ # is the divisor; the dividend and destination operands are implicit, in
2819
+ # the following way:
2820
+
2821
+ # * For |IDIV r/m8|, |AX| is divided by the given operand; the
2822
+ # quotient is stored in |AL| and the remainder in |AH|.
2823
+ # * For |IDIV r/m16|, |DX:AX| is divided by the given operand; the
2824
+ # quotient is stored in |AX| and the remainder in |DX|.
2825
+ # * For |IDIV r/m32|, |EDX:EAX| is divided by the given operand; the
2826
+ # quotient is stored in |EAX| and the remainder in |EDX|.
2827
+
2828
+ # Unsigned integer division is performed by the |DIV| instruction: see
2829
+ # section A.25 <#section-A.25>.
2830
+
2831
+
2832
+ # A.77 |IMUL|: Signed Integer Multiply
2833
+
2834
+ # IMUL r/m8 ; F6 /5 [8086]
2835
+ # IMUL r/m16 ; o16 F7 /5 [8086]
2836
+ # IMUL r/m32 ; o32 F7 /5 [386]
2837
+
2838
+ # IMUL reg16,r/m16 ; o16 0F AF /r [386]
2839
+ # IMUL reg32,r/m32 ; o32 0F AF /r [386]
2840
+
2841
+ # IMUL reg16,imm8 ; o16 6B /r ib [286]
2842
+ # IMUL reg16,imm16 ; o16 69 /r iw [286]
2843
+ # IMUL reg32,imm8 ; o32 6B /r ib [386]
2844
+ # IMUL reg32,imm32 ; o32 69 /r id [386]
2845
+
2846
+ # IMUL reg16,r/m16,imm8 ; o16 6B /r ib [286]
2847
+ # IMUL reg16,r/m16,imm16 ; o16 69 /r iw [286]
2848
+ # IMUL reg32,r/m32,imm8 ; o32 6B /r ib [386]
2849
+ # IMUL reg32,r/m32,imm32 ; o32 69 /r id [386]
2850
+
2851
+ # |IMUL| performs signed integer multiplication. For the single-operand
2852
+ # form, the other operand and destination are implicit, in the following way:
2853
+
2854
+ # * For |IMUL r/m8|, |AL| is multiplied by the given operand; the
2855
+ # product is stored in |AX|.
2856
+ # * For |IMUL r/m16|, |AX| is multiplied by the given operand; the
2857
+ # product is stored in |DX:AX|.
2858
+ # * For |IMUL r/m32|, |EAX| is multiplied by the given operand; the
2859
+ # product is stored in |EDX:EAX|.
2860
+
2861
+ # The two-operand form multiplies its two operands and stores the result
2862
+ # in the destination (first) operand. The three-operand form multiplies
2863
+ # its last two operands and stores the result in the first operand.
2864
+
2865
+ # The two-operand form is in fact a shorthand for the three-operand form,
2866
+ # as can be seen by examining the opcode descriptions: in the two-operand
2867
+ # form, the code |/r| takes both its register and |r/m| parts from the
2868
+ # same operand (the first one).
2869
+
2870
+ # In the forms with an 8-bit immediate operand and another longer source
2871
+ # operand, the immediate operand is considered to be signed, and is
2872
+ # sign-extended to the length of the other source operand. In these cases,
2873
+ # the |BYTE| qualifier is necessary to force NASM to generate this form of
2874
+ # the instruction.
2875
+
2876
+ # Unsigned integer multiplication is performed by the |MUL| instruction:
2877
+ # see section A.107 <#section-A.107>.
2878
+
2879
+
2880
+ # A.78 |IN|: Input from I/O Port
2881
+
2882
+ # IN AL,imm8 ; E4 ib [8086]
2883
+ # IN AX,imm8 ; o16 E5 ib [8086]
2884
+ # IN EAX,imm8 ; o32 E5 ib [386]
2885
+ # IN AL,DX ; EC [8086]
2886
+ # IN AX,DX ; o16 ED [8086]
2887
+ # IN EAX,DX ; o32 ED [386]
2888
+
2889
+ # |IN| reads a byte, word or doubleword from the specified I/O port, and
2890
+ # stores it in the given destination register. The port number may be
2891
+ # specified as an immediate value if it is between 0 and 255, and
2892
+ # otherwise must be stored in |DX|. See also |OUT| (section A.111
2893
+ # <#section-A.111>).
2894
+
2895
+
2896
+ # A.79 |INC|: Increment Integer
2897
+
2898
+ # INC reg16 ; o16 40+r [8086]
2899
+ # INC reg32 ; o32 40+r [386]
2900
+ # INC r/m8 ; FE /0 [8086]
2901
+ # INC r/m16 ; o16 FF /0 [8086]
2902
+ # INC r/m32 ; o32 FF /0 [386]
2903
+
2904
+ # |INC| adds 1 to its operand. It does /not/ affect the carry flag: to
2905
+ # affect the carry flag, use |ADD something,1| (see section A.6
2906
+ # <#section-A.6>). See also |DEC| (section A.24 <#section-A.24>).
2907
+
2908
+
2909
+ # A.80 |INSB|, |INSW|, |INSD|: Input String from I/O Port
2910
+
2911
+ # INSB ; 6C [186]
2912
+ # INSW ; o16 6D [186]
2913
+ # INSD ; o32 6D [386]
2914
+
2915
+ # |INSB| inputs a byte from the I/O port specified in |DX| and stores it
2916
+ # at |[ES:DI]| or |[ES:EDI]|. It then increments or decrements (depending
2917
+ # on the direction flag: increments if the flag is clear, decrements if it
2918
+ # is set) |DI| or |EDI|.
2919
+
2920
+ # The register used is |DI| if the address size is 16 bits, and |EDI| if
2921
+ # it is 32 bits. If you need to use an address size not equal to the
2922
+ # current |BITS| setting, you can use an explicit |a16| or |a32| prefix.
2923
+
2924
+ # Segment override prefixes have no effect for this instruction: the use
2925
+ # of |ES| for the load from |[DI]| or |[EDI]| cannot be overridden.
2926
+
2927
+ # |INSW| and |INSD| work in the same way, but they input a word or a
2928
+ # doubleword instead of a byte, and increment or decrement the addressing
2929
+ # register by 2 or 4 instead of 1.
2930
+
2931
+ # The |REP| prefix may be used to repeat the instruction |CX| (or |ECX| -
2932
+ # again, the address size chooses which) times.
2933
+
2934
+ # See also |OUTSB|, |OUTSW| and |OUTSD| (section A.112 <#section-A.112>).
2935
+
2936
+
2937
+ # A.81 |INT|: Software Interrupt
2938
+
2939
+ # INT imm8 ; CD ib [8086]
2940
+
2941
+ # |INT| causes a software interrupt through a specified vector number from
2942
+ # 0 to 255.
2943
+
2944
+ # The code generated by the |INT| instruction is always two bytes long:
2945
+ # although there are short forms for some |INT| instructions, NASM does
2946
+ # not generate them when it sees the |INT| mnemonic. In order to generate
2947
+ # single-byte breakpoint instructions, use the |INT3| or |INT1|
2948
+ # instructions (see section A.82 <#section-A.82>) instead.
2949
+
2950
+
2951
+ # A.82 |INT3|, |INT1|, |ICEBP|, |INT01|: Breakpoints
2952
+
2953
+ # INT1 ; F1 [P6]
2954
+ # ICEBP ; F1 [P6]
2955
+ # INT01 ; F1 [P6]
2956
+
2957
+ # INT3 ; CC [8086]
2958
+
2959
+ # |INT1| and |INT3| are short one-byte forms of the instructions |INT 1|
2960
+ # and |INT 3| (see section A.81 <#section-A.81>). They perform a similar
2961
+ # function to their longer counterparts, but take up less code space. They
2962
+ # are used as breakpoints by debuggers.
2963
+
2964
+ # |INT1|, and its alternative synonyms |INT01| and |ICEBP|, is an
2965
+ # instruction used by in-circuit emulators (ICEs). It is present, though
2966
+ # not documented, on some processors down to the 286, but is only
2967
+ # documented for the Pentium Pro. |INT3| is the instruction normally used
2968
+ # as a breakpoint by debuggers.
2969
+
2970
+ # |INT3| is not precisely equivalent to |INT 3|: the short form, since it
2971
+ # is designed to be used as a breakpoint, bypasses the normal IOPL checks
2972
+ # in virtual-8086 mode, and also does not go through interrupt redirection.
2973
+
2974
+
2975
+ # A.83 |INTO|: Interrupt if Overflow
2976
+
2977
+ # INTO ; CE [8086]
2978
+
2979
+ # |INTO| performs an |INT 4| software interrupt (see section A.81
2980
+ # <#section-A.81>) if and only if the overflow flag is set.
2981
+
2982
+
2983
+ # A.84 |INVD|: Invalidate Internal Caches
2984
+
2985
+ # INVD ; 0F 08 [486]
2986
+
2987
+ # |INVD| invalidates and empties the processor's internal caches, and
2988
+ # causes the processor to instruct external caches to do the same. It does
2989
+ # not write the contents of the caches back to memory first: any modified
2990
+ # data held in the caches will be lost. To write the data back first, use
2991
+ # |WBINVD| (section A.164 <#section-A.164>).
2992
+
2993
+
2994
+ # A.85 |INVLPG|: Invalidate TLB Entry
2995
+
2996
+ # INVLPG mem ; 0F 01 /0 [486]
2997
+
2998
+ # |INVLPG| invalidates the translation lookahead buffer (TLB) entry
2999
+ # associated with the supplied memory address.
3000
+
3001
+
3002
+ # A.86 |IRET|, |IRETW|, |IRETD|: Return from Interrupt
3003
+
3004
+ # IRET ; CF [8086]
3005
+ # IRETW ; o16 CF [8086]
3006
+ # IRETD ; o32 CF [386]
3007
+
3008
+ # |IRET| returns from an interrupt (hardware or software) by means of
3009
+ # popping |IP| (or |EIP|), |CS| and the flags off the stack and then
3010
+ # continuing execution from the new |CS:IP|.
3011
+
3012
+ # |IRETW| pops |IP|, |CS| and the flags as 2 bytes each, taking 6 bytes
3013
+ # off the stack in total. |IRETD| pops |EIP| as 4 bytes, pops a further 4
3014
+ # bytes of which the top two are discarded and the bottom two go into
3015
+ # |CS|, and pops the flags as 4 bytes as well, taking 12 bytes off the stack.
3016
+
3017
+ # |IRET| is a shorthand for either |IRETW| or |IRETD|, depending on the
3018
+ # default |BITS| setting at the time.
3019
+
3020
+
3021
+ # A.87 |JCXZ|, |JECXZ|: Jump if CX/ECX Zero
3022
+
3023
+ # JCXZ imm ; o16 E3 rb [8086]
3024
+ # JECXZ imm ; o32 E3 rb [386]
3025
+
3026
+ # |JCXZ| performs a short jump (with maximum range 128 bytes) if and only
3027
+ # if the contents of the |CX| register is 0. |JECXZ| does the same thing,
3028
+ # but with |ECX|.
3029
+
3030
+
3031
+ # A.88 |JMP|: Jump
3032
+
3033
+ # JMP imm ; E9 rw/rd [8086]
3034
+
3035
+ # JMP SHORT imm ; EB rb [8086]
3036
+ # JMP imm:imm16 ; o16 EA iw iw [8086]
3037
+ # JMP imm:imm32 ; o32 EA id iw [386]
3038
+
3039
+ # JMP FAR mem ; o16 FF /5 [8086]
3040
+ # JMP FAR mem ; o32 FF /5 [386]
3041
+ # JMP r/m16 ; o16 FF /4 [8086]
3042
+ # JMP r/m32 ; o32 FF /4 [386]
3043
+
3044
+ # |JMP| jumps to a given address. The address may be specified as an
3045
+ # absolute segment and offset, or as a relative jump within the current
3046
+ # segment.
3047
+
3048
+ # |JMP SHORT imm| has a maximum range of 128 bytes, since the displacement
3049
+ # is specified as only 8 bits, but takes up less code space. NASM does not
3050
+ # choose when to generate |JMP SHORT| for you: you must explicitly code
3051
+ # |SHORT| every time you want a short jump.
3052
+
3053
+ # You can choose between the two immediate far jump forms (|JMP imm:imm|)
3054
+ # by the use of the |WORD| and |DWORD| keywords: |JMP WORD 0x1234:0x5678|)
3055
+ # or |JMP DWORD 0x1234:0x56789abc|.
3056
+
3057
+ # The |JMP FAR mem| forms execute a far jump by loading the destination
3058
+ # address out of memory. The address loaded consists of 16 or 32 bits of
3059
+ # offset (depending on the operand size), and 16 bits of segment. The
3060
+ # operand size may be overridden using |JMP WORD FAR mem| or |JMP DWORD
3061
+ # FAR mem|.
3062
+
3063
+ # The |JMP r/m| forms execute a near jump (within the same segment),
3064
+ # loading the destination address out of memory or out of a register. The
3065
+ # keyword |NEAR| may be specified, for clarity, in these forms, but is not
3066
+ # necessary. Again, operand size can be overridden using |JMP WORD mem| or
3067
+ # |JMP DWORD mem|.
3068
+
3069
+ # As a convenience, NASM does not require you to jump to a far symbol by
3070
+ # coding the cumbersome |JMP SEG routine:routine|, but instead allows the
3071
+ # easier synonym |JMP FAR routine|.
3072
+
3073
+ # The |CALL r/m| forms given above are near calls; NASM will accept the
3074
+ # |NEAR| keyword (e.g. |CALL NEAR [address]|), even though it is not
3075
+ # strictly necessary.
3076
+
3077
+
3078
+ # A.89 |Jcc|: Conditional Branch
3079
+
3080
+ # Jcc imm ; 70+cc rb [8086]
3081
+ # Jcc NEAR imm ; 0F 80+cc rw/rd [386]
3082
+
3083
+ # The conditional jump instructions execute a near (same segment) jump if
3084
+ # and only if their conditions are satisfied. For example, |JNZ| jumps
3085
+ # only if the zero flag is not set.
3086
+
3087
+ # The ordinary form of the instructions has only a 128-byte range; the
3088
+ # |NEAR| form is a 386 extension to the instruction set, and can span the
3089
+ # full size of a segment. NASM will not override your choice of jump
3090
+ # instruction: if you want |Jcc NEAR|, you have to use the |NEAR| keyword.
3091
+
3092
+ # The |SHORT| keyword is allowed on the first form of the instruction, for
3093
+ # clarity, but is not necessary.
3094
+
3095
+
3096
+ # A.90 |LAHF|: Load AH from Flags
3097
+
3098
+ # LAHF ; 9F [8086]
3099
+
3100
+ # |LAHF| sets the |AH| register according to the contents of the low byte
3101
+ # of the flags word. See also |SAHF| (section A.145 <#section-A.145>).
3102
+
3103
+
3104
+ # A.91 |LAR|: Load Access Rights
3105
+
3106
+ # LAR reg16,r/m16 ; o16 0F 02 /r [286,PRIV]
3107
+ # LAR reg32,r/m32 ; o32 0F 02 /r [286,PRIV]
3108
+
3109
+ # |LAR| takes the segment selector specified by its source (second)
3110
+ # operand, finds the corresponding segment descriptor in the GDT or LDT,
3111
+ # and loads the access-rights byte of the descriptor into its destination
3112
+ # (first) operand.
3113
+
3114
+
3115
+ # A.92 |LDS|, |LES|, |LFS|, |LGS|, |LSS|: Load Far Pointer
3116
+
3117
+ # LDS reg16,mem ; o16 C5 /r [8086]
3118
+ # LDS reg32,mem ; o32 C5 /r [8086]
3119
+
3120
+ # LES reg16,mem ; o16 C4 /r [8086]
3121
+ # LES reg32,mem ; o32 C4 /r [8086]
3122
+
3123
+ # LFS reg16,mem ; o16 0F B4 /r [386]
3124
+ # LFS reg32,mem ; o32 0F B4 /r [386]
3125
+
3126
+ # LGS reg16,mem ; o16 0F B5 /r [386]
3127
+ # LGS reg32,mem ; o32 0F B5 /r [386]
3128
+
3129
+ # LSS reg16,mem ; o16 0F B2 /r [386]
3130
+ # LSS reg32,mem ; o32 0F B2 /r [386]
3131
+
3132
+ # These instructions load an entire far pointer (16 or 32 bits of offset,
3133
+ # plus 16 bits of segment) out of memory in one go. |LDS|, for example,
3134
+ # loads 16 or 32 bits from the given memory address into the given
3135
+ # register (depending on the size of the register), then loads the /next/
3136
+ # 16 bits from memory into |DS|. |LES|, |LFS|, |LGS| and |LSS| work in the
3137
+ # same way but use the other segment registers.
3138
+
3139
+
3140
+ # A.93 |LEA|: Load Effective Address
3141
+
3142
+ # LEA reg16,mem ; o16 8D /r [8086]
3143
+ # LEA reg32,mem ; o32 8D /r [8086]
3144
+
3145
+ # |LEA|, despite its syntax, does not access memory. It calculates the
3146
+ # effective address specified by its second operand as if it were going to
3147
+ # load or store data from it, but instead it stores the calculated address
3148
+ # into the register specified by its first operand. This can be used to
3149
+ # perform quite complex calculations (e.g. |LEA EAX,[EBX+ECX*4+100]|) in
3150
+ # one instruction.
3151
+
3152
+ # |LEA|, despite being a purely arithmetic instruction which accesses no
3153
+ # memory, still requires square brackets around its second operand, as if
3154
+ # it were a memory reference.
3155
+
3156
+
3157
+ # A.94 |LEAVE|: Destroy Stack Frame
3158
+
3159
+ # LEAVE ; C9 [186]
3160
+
3161
+ # |LEAVE| destroys a stack frame of the form created by the |ENTER|
3162
+ # instruction (see section A.27 <#section-A.27>). It is functionally
3163
+ # equivalent to |MOV ESP,EBP| followed by |POP EBP| (or |MOV SP,BP|
3164
+ # followed by |POP BP| in 16-bit mode).
3165
+
3166
+
3167
+ # A.95 |LGDT|, |LIDT|, |LLDT|: Load Descriptor Tables
3168
+
3169
+ # LGDT mem ; 0F 01 /2 [286,PRIV]
3170
+ # LIDT mem ; 0F 01 /3 [286,PRIV]
3171
+ # LLDT r/m16 ; 0F 00 /2 [286,PRIV]
3172
+
3173
+ # |LGDT| and |LIDT| both take a 6-byte memory area as an operand: they
3174
+ # load a 32-bit linear address and a 16-bit size limit from that area (in
3175
+ # the opposite order) into the GDTR (global descriptor table register) or
3176
+ # IDTR (interrupt descriptor table register). These are the only
3177
+ # instructions which directly use /linear/ addresses, rather than
3178
+ # segment/offset pairs.
3179
+
3180
+ # |LLDT| takes a segment selector as an operand. The processor looks up
3181
+ # that selector in the GDT and stores the limit and base address given
3182
+ # there into the LDTR (local descriptor table register).
3183
+
3184
+ # See also |SGDT|, |SIDT| and |SLDT| (section A.151 <#section-A.151>).
3185
+
3186
+
3187
+ # A.96 |LMSW|: Load/Store Machine Status Word
3188
+
3189
+ # LMSW r/m16 ; 0F 01 /6 [286,PRIV]
3190
+
3191
+ # |LMSW| loads the bottom four bits of the source operand into the bottom
3192
+ # four bits of the |CR0| control register (or the Machine Status Word, on
3193
+ # 286 processors). See also |SMSW| (section A.155 <#section-A.155>).
3194
+
3195
+
3196
+ # A.97 |LOADALL|, |LOADALL286|: Load Processor State
3197
+
3198
+ # LOADALL ; 0F 07 [386,UNDOC]
3199
+ # LOADALL286 ; 0F 05 [286,UNDOC]
3200
+
3201
+ # This instruction, in its two different-opcode forms, is apparently
3202
+ # supported on most 286 processors, some 386 and possibly some 486. The
3203
+ # opcode differs between the 286 and the 386.
3204
+
3205
+ # The function of the instruction is to load all information relating to
3206
+ # the state of the processor out of a block of memory: on the 286, this
3207
+ # block is located implicitly at absolute address |0x800|, and on the 386
3208
+ # and 486 it is at |[ES:EDI]|.
3209
+
3210
+
3211
+ # A.98 |LODSB|, |LODSW|, |LODSD|: Load from String
3212
+
3213
+ # LODSB ; AC [8086]
3214
+ # LODSW ; o16 AD [8086]
3215
+ # LODSD ; o32 AD [386]
3216
+
3217
+ # |LODSB| loads a byte from |[DS:SI]| or |[DS:ESI]| into |AL|. It then
3218
+ # increments or decrements (depending on the direction flag: increments if
3219
+ # the flag is clear, decrements if it is set) |SI| or |ESI|.
3220
+
3221
+ # The register used is |SI| if the address size is 16 bits, and |ESI| if
3222
+ # it is 32 bits. If you need to use an address size not equal to the
3223
+ # current |BITS| setting, you can use an explicit |a16| or |a32| prefix.
3224
+
3225
+ # The segment register used to load from |[SI]| or |[ESI]| can be
3226
+ # overridden by using a segment register name as a prefix (for example,
3227
+ # |es lodsb|).
3228
+
3229
+ # |LODSW| and |LODSD| work in the same way, but they load a word or a
3230
+ # doubleword instead of a byte, and increment or decrement the addressing
3231
+ # registers by 2 or 4 instead of 1.
3232
+
3233
+
3234
+ # A.99 |LOOP|, |LOOPE|, |LOOPZ|, |LOOPNE|, |LOOPNZ|: Loop with Counter
3235
+
3236
+ # LOOP imm ; E2 rb [8086]
3237
+ # LOOP imm,CX ; a16 E2 rb [8086]
3238
+ # LOOP imm,ECX ; a32 E2 rb [386]
3239
+
3240
+ # LOOPE imm ; E1 rb [8086]
3241
+ # LOOPE imm,CX ; a16 E1 rb [8086]
3242
+ # LOOPE imm,ECX ; a32 E1 rb [386]
3243
+ # LOOPZ imm ; E1 rb [8086]
3244
+ # LOOPZ imm,CX ; a16 E1 rb [8086]
3245
+ # LOOPZ imm,ECX ; a32 E1 rb [386]
3246
+
3247
+ # LOOPNE imm ; E0 rb [8086]
3248
+ # LOOPNE imm,CX ; a16 E0 rb [8086]
3249
+ # LOOPNE imm,ECX ; a32 E0 rb [386]
3250
+ # LOOPNZ imm ; E0 rb [8086]
3251
+ # LOOPNZ imm,CX ; a16 E0 rb [8086]
3252
+ # LOOPNZ imm,ECX ; a32 E0 rb [386]
3253
+
3254
+ # |LOOP| decrements its counter register (either |CX| or |ECX| - if one is
3255
+ # not specified explicitly, the |BITS| setting dictates which is used) by
3256
+ # one, and if the counter does not become zero as a result of this
3257
+ # operation, it jumps to the given label. The jump has a range of 128 bytes.
3258
+
3259
+ # |LOOPE| (or its synonym |LOOPZ|) adds the additional condition that it
3260
+ # only jumps if the counter is nonzero /and/ the zero flag is set.
3261
+ # Similarly, |LOOPNE| (and |LOOPNZ|) jumps only if the counter is nonzero
3262
+ # and the zero flag is clear.
3263
+
3264
+
3265
+ # A.100 |LSL|: Load Segment Limit
3266
+
3267
+ # LSL reg16,r/m16 ; o16 0F 03 /r [286,PRIV]
3268
+ # LSL reg32,r/m32 ; o32 0F 03 /r [286,PRIV]
3269
+
3270
+ # |LSL| is given a segment selector in its source (second) operand; it
3271
+ # computes the segment limit value by loading the segment limit field from
3272
+ # the associated segment descriptor in the GDT or LDT. (This involves
3273
+ # shifting left by 12 bits if the segment limit is page-granular, and not
3274
+ # if it is byte-granular; so you end up with a byte limit in either case.)
3275
+ # The segment limit obtained is then loaded into the destination (first)
3276
+ # operand.
3277
+
3278
+
3279
+ # A.101 |LTR|: Load Task Register
3280
+
3281
+ # LTR r/m16 ; 0F 00 /3 [286,PRIV]
3282
+
3283
+ # |LTR| looks up the segment base and limit in the GDT or LDT descriptor
3284
+ # specified by the segment selector given as its operand, and loads them
3285
+ # into the Task Register.
3286
+
3287
+
3288
+ # A.102 |MOV|: Move Data
3289
+
3290
+ # MOV r/m8,reg8 ; 88 /r [8086]
3291
+ # MOV r/m16,reg16 ; o16 89 /r [8086]
3292
+ # MOV r/m32,reg32 ; o32 89 /r [386]
3293
+ # MOV reg8,r/m8 ; 8A /r [8086]
3294
+ # MOV reg16,r/m16 ; o16 8B /r [8086]
3295
+ # MOV reg32,r/m32 ; o32 8B /r [386]
3296
+
3297
+ # MOV reg8,imm8 ; B0+r ib [8086]
3298
+ # MOV reg16,imm16 ; o16 B8+r iw [8086]
3299
+ # MOV reg32,imm32 ; o32 B8+r id [386]
3300
+ # MOV r/m8,imm8 ; C6 /0 ib [8086]
3301
+ # MOV r/m16,imm16 ; o16 C7 /0 iw [8086]
3302
+ # MOV r/m32,imm32 ; o32 C7 /0 id [386]
3303
+
3304
+ # MOV AL,memoffs8 ; A0 ow/od [8086]
3305
+ # MOV AX,memoffs16 ; o16 A1 ow/od [8086]
3306
+ # MOV EAX,memoffs32 ; o32 A1 ow/od [386]
3307
+ # MOV memoffs8,AL ; A2 ow/od [8086]
3308
+ # MOV memoffs16,AX ; o16 A3 ow/od [8086]
3309
+ # MOV memoffs32,EAX ; o32 A3 ow/od [386]
3310
+
3311
+ # MOV r/m16,segreg ; o16 8C /r [8086]
3312
+ # MOV r/m32,segreg ; o32 8C /r [386]
3313
+ # MOV segreg,r/m16 ; o16 8E /r [8086]
3314
+ # MOV segreg,r/m32 ; o32 8E /r [386]
3315
+
3316
+ # MOV reg32,CR0/2/3/4 ; 0F 20 /r [386]
3317
+ # MOV reg32,DR0/1/2/3/6/7 ; 0F 21 /r [386]
3318
+ # MOV reg32,TR3/4/5/6/7 ; 0F 24 /r [386]
3319
+ # MOV CR0/2/3/4,reg32 ; 0F 22 /r [386]
3320
+ # MOV DR0/1/2/3/6/7,reg32 ; 0F 23 /r [386]
3321
+ # MOV TR3/4/5/6/7,reg32 ; 0F 26 /r [386]
3322
+
3323
+ # |MOV| copies the contents of its source (second) operand into its
3324
+ # destination (first) operand.
3325
+
3326
+ # In all forms of the |MOV| instruction, the two operands are the same
3327
+ # size, except for moving between a segment register and an |r/m32|
3328
+ # operand. These instructions are treated exactly like the corresponding
3329
+ # 16-bit equivalent (so that, for example, |MOV DS,EAX| functions
3330
+ # identically to |MOV DS,AX| but saves a prefix when in 32-bit mode),
3331
+ # except that when a segment register is moved into a 32-bit destination,
3332
+ # the top two bytes of the result are undefined.
3333
+
3334
+ # |MOV| may not use |CS| as a destination.
3335
+
3336
+ # |CR4| is only a supported register on the Pentium and above.
3337
+
3338
+
3339
+ # A.103 |MOVD|: Move Doubleword to/from MMX Register
3340
+
3341
+ # MOVD mmxreg,r/m32 ; 0F 6E /r [PENT,MMX]
3342
+ # MOVD r/m32,mmxreg ; 0F 7E /r [PENT,MMX]
3343
+
3344
+ # |MOVD| copies 32 bits from its source (second) operand into its
3345
+ # destination (first) operand. When the destination is a 64-bit MMX
3346
+ # register, the top 32 bits are set to zero.
3347
+
3348
+
3349
+ # A.104 |MOVQ|: Move Quadword to/from MMX Register
3350
+
3351
+ # MOVQ mmxreg,r/m64 ; 0F 6F /r [PENT,MMX]
3352
+ # MOVQ r/m64,mmxreg ; 0F 7F /r [PENT,MMX]
3353
+
3354
+ # |MOVQ| copies 64 bits from its source (second) operand into its
3355
+ # destination (first) operand.
3356
+
3357
+
3358
+ # A.105 |MOVSB|, |MOVSW|, |MOVSD|: Move String
3359
+
3360
+ # MOVSB ; A4 [8086]
3361
+ # MOVSW ; o16 A5 [8086]
3362
+ # MOVSD ; o32 A5 [386]
3363
+
3364
+ # |MOVSB| copies the byte at |[ES:DI]| or |[ES:EDI]| to |[DS:SI]| or
3365
+ # |[DS:ESI]|. It then increments or decrements (depending on the direction
3366
+ # flag: increments if the flag is clear, decrements if it is set) |SI| and
3367
+ # |DI| (or |ESI| and |EDI|).
3368
+
3369
+ # The registers used are |SI| and |DI| if the address size is 16 bits, and
3370
+ # |ESI| and |EDI| if it is 32 bits. If you need to use an address size not
3371
+ # equal to the current |BITS| setting, you can use an explicit |a16| or
3372
+ # |a32| prefix.
3373
+
3374
+ # The segment register used to load from |[SI]| or |[ESI]| can be
3375
+ # overridden by using a segment register name as a prefix (for example,
3376
+ # |es movsb|). The use of |ES| for the store to |[DI]| or |[EDI]| cannot
3377
+ # be overridden.
3378
+
3379
+ # |MOVSW| and |MOVSD| work in the same way, but they copy a word or a
3380
+ # doubleword instead of a byte, and increment or decrement the addressing
3381
+ # registers by 2 or 4 instead of 1.
3382
+
3383
+ # The |REP| prefix may be used to repeat the instruction |CX| (or |ECX| -
3384
+ # again, the address size chooses which) times.
3385
+
3386
+
3387
+ # A.106 |MOVSX|, |MOVZX|: Move Data with Sign or Zero Extend
3388
+
3389
+ # MOVSX reg16,r/m8 ; o16 0F BE /r [386]
3390
+ # MOVSX reg32,r/m8 ; o32 0F BE /r [386]
3391
+ # MOVSX reg32,r/m16 ; o32 0F BF /r [386]
3392
+
3393
+ # MOVZX reg16,r/m8 ; o16 0F B6 /r [386]
3394
+ # MOVZX reg32,r/m8 ; o32 0F B6 /r [386]
3395
+ # MOVZX reg32,r/m16 ; o32 0F B7 /r [386]
3396
+
3397
+ # |MOVSX| sign-extends its source (second) operand to the length of its
3398
+ # destination (first) operand, and copies the result into the destination
3399
+ # operand. |MOVZX| does the same, but zero-extends rather than
3400
+ # sign-extending.
3401
+
3402
+
3403
+ # A.107 |MUL|: Unsigned Integer Multiply
3404
+
3405
+ # MUL r/m8 ; F6 /4 [8086]
3406
+ # MUL r/m16 ; o16 F7 /4 [8086]
3407
+ # MUL r/m32 ; o32 F7 /4 [386]
3408
+
3409
+ # |MUL| performs unsigned integer multiplication. The other operand to the
3410
+ # multiplication, and the destination operand, are implicit, in the
3411
+ # following way:
3412
+
3413
+ # * For |MUL r/m8|, |AL| is multiplied by the given operand; the
3414
+ # product is stored in |AX|.
3415
+ # * For |MUL r/m16|, |AX| is multiplied by the given operand; the
3416
+ # product is stored in |DX:AX|.
3417
+ # * For |MUL r/m32|, |EAX| is multiplied by the given operand; the
3418
+ # product is stored in |EDX:EAX|.
3419
+
3420
+ # Signed integer multiplication is performed by the |IMUL| instruction:
3421
+ # see section A.77 <#section-A.77>.
3422
+
3423
+
3424
+ # A.108 |NEG|, |NOT|: Two's and One's Complement
3425
+
3426
+ # NEG r/m8 ; F6 /3 [8086]
3427
+ # NEG r/m16 ; o16 F7 /3 [8086]
3428
+ # NEG r/m32 ; o32 F7 /3 [386]
3429
+
3430
+ # NOT r/m8 ; F6 /2 [8086]
3431
+ # NOT r/m16 ; o16 F7 /2 [8086]
3432
+ # NOT r/m32 ; o32 F7 /2 [386]
3433
+
3434
+ # |NEG| replaces the contents of its operand by the two's complement
3435
+ # negation (invert all the bits and then add one) of the original value.
3436
+ # |NOT|, similarly, performs one's complement (inverts all the bits).
3437
+
3438
+
3439
+ # A.109 |NOP|: No Operation
3440
+
3441
+ # NOP ; 90 [8086]
3442
+
3443
+ # |NOP| performs no operation. Its opcode is the same as that generated by
3444
+ # |XCHG AX,AX| or |XCHG EAX,EAX| (depending on the processor mode; see
3445
+ # section A.168 <#section-A.168>).
3446
+
3447
+
3448
+ # A.110 |OR|: Bitwise OR
3449
+
3450
+ # OR r/m8,reg8 ; 08 /r [8086]
3451
+ # OR r/m16,reg16 ; o16 09 /r [8086]
3452
+ # OR r/m32,reg32 ; o32 09 /r [386]
3453
+
3454
+ # OR reg8,r/m8 ; 0A /r [8086]
3455
+ # OR reg16,r/m16 ; o16 0B /r [8086]
3456
+ # OR reg32,r/m32 ; o32 0B /r [386]
3457
+
3458
+ # OR r/m8,imm8 ; 80 /1 ib [8086]
3459
+ # OR r/m16,imm16 ; o16 81 /1 iw [8086]
3460
+ # OR r/m32,imm32 ; o32 81 /1 id [386]
3461
+
3462
+ # OR r/m16,imm8 ; o16 83 /1 ib [8086]
3463
+ # OR r/m32,imm8 ; o32 83 /1 ib [386]
3464
+
3465
+ # OR AL,imm8 ; 0C ib [8086]
3466
+ # OR AX,imm16 ; o16 0D iw [8086]
3467
+ # OR EAX,imm32 ; o32 0D id [386]
3468
+
3469
+ # |OR| performs a bitwise OR operation between its two operands (i.e. each
3470
+ # bit of the result is 1 if and only if at least one of the corresponding
3471
+ # bits of the two inputs was 1), and stores the result in the destination
3472
+ # (first) operand.
3473
+
3474
+ # In the forms with an 8-bit immediate second operand and a longer first
3475
+ # operand, the second operand is considered to be signed, and is
3476
+ # sign-extended to the length of the first operand. In these cases, the
3477
+ # |BYTE| qualifier is necessary to force NASM to generate this form of the
3478
+ # instruction.
3479
+
3480
+ # The MMX instruction |POR| (see section A.129 <#section-A.129>) performs
3481
+ # the same operation on the 64-bit MMX registers.
3482
+
3483
+
3484
+ # A.111 |OUT|: Output Data to I/O Port
3485
+
3486
+ # OUT imm8,AL ; E6 ib [8086]
3487
+ # OUT imm8,AX ; o16 E7 ib [8086]
3488
+ # OUT imm8,EAX ; o32 E7 ib [386]
3489
+ # OUT DX,AL ; EE [8086]
3490
+ # OUT DX,AX ; o16 EF [8086]
3491
+ # OUT DX,EAX ; o32 EF [386]
3492
+
3493
+ # |IN| writes the contents of the given source register to the specified
3494
+ # I/O port. The port number may be specified as an immediate value if it
3495
+ # is between 0 and 255, and otherwise must be stored in |DX|. See also
3496
+ # |IN| (section A.78 <#section-A.78>).
3497
+
3498
+
3499
+ # A.112 |OUTSB|, |OUTSW|, |OUTSD|: Output String to I/O Port
3500
+
3501
+ # OUTSB ; 6E [186]
3502
+
3503
+ # OUTSW ; o16 6F [186]
3504
+
3505
+ # OUTSD ; o32 6F [386]
3506
+
3507
+ # |OUTSB| loads a byte from |[DS:SI]| or |[DS:ESI]| and writes it to the
3508
+ # I/O port specified in |DX|. It then increments or decrements (depending
3509
+ # on the direction flag: increments if the flag is clear, decrements if it
3510
+ # is set) |SI| or |ESI|.
3511
+
3512
+ # The register used is |SI| if the address size is 16 bits, and |ESI| if
3513
+ # it is 32 bits. If you need to use an address size not equal to the
3514
+ # current |BITS| setting, you can use an explicit |a16| or |a32| prefix.
3515
+
3516
+ # The segment register used to load from |[SI]| or |[ESI]| can be
3517
+ # overridden by using a segment register name as a prefix (for example,
3518
+ # |es outsb|).
3519
+
3520
+ # |OUTSW| and |OUTSD| work in the same way, but they output a word or a
3521
+ # doubleword instead of a byte, and increment or decrement the addressing
3522
+ # registers by 2 or 4 instead of 1.
3523
+
3524
+ # The |REP| prefix may be used to repeat the instruction |CX| (or |ECX| -
3525
+ # again, the address size chooses which) times.
3526
+
3527
+
3528
+ # A.113 |PACKSSDW|, |PACKSSWB|, |PACKUSWB|: Pack Data
3529
+
3530
+ # PACKSSDW mmxreg,r/m64 ; 0F 6B /r [PENT,MMX]
3531
+ # PACKSSWB mmxreg,r/m64 ; 0F 63 /r [PENT,MMX]
3532
+ # PACKUSWB mmxreg,r/m64 ; 0F 67 /r [PENT,MMX]
3533
+
3534
+ # All these instructions start by forming a notional 128-bit word by
3535
+ # placing the source (second) operand on the left of the destination
3536
+ # (first) operand. |PACKSSDW| then splits this 128-bit word into four
3537
+ # doublewords, converts each to a word, and loads them side by side into
3538
+ # the destination register; |PACKSSWB| and |PACKUSWB| both split the
3539
+ # 128-bit word into eight words, converts each to a byte, and loads
3540
+ # /those/ side by side into the destination register.
3541
+
3542
+ # |PACKSSDW| and |PACKSSWB| perform signed saturation when reducing the
3543
+ # length of numbers: if the number is too large to fit into the reduced
3544
+ # space, they replace it by the largest signed number (|7FFFh| or |7Fh|)
3545
+ # that /will/ fit, and if it is too small then they replace it by the
3546
+ # smallest signed number (|8000h| or |80h|) that will fit. |PACKUSWB|
3547
+ # performs unsigned saturation: it treats its input as unsigned, and
3548
+ # replaces it by the largest unsigned number that will fit.
3549
+
3550
+
3551
+ # A.114 |PADDxx|: MMX Packed Addition
3552
+
3553
+ # PADDB mmxreg,r/m64 ; 0F FC /r [PENT,MMX]
3554
+ # PADDW mmxreg,r/m64 ; 0F FD /r [PENT,MMX]
3555
+ # PADDD mmxreg,r/m64 ; 0F FE /r [PENT,MMX]
3556
+
3557
+ # PADDSB mmxreg,r/m64 ; 0F EC /r [PENT,MMX]
3558
+ # PADDSW mmxreg,r/m64 ; 0F ED /r [PENT,MMX]
3559
+
3560
+ # PADDUSB mmxreg,r/m64 ; 0F DC /r [PENT,MMX]
3561
+ # PADDUSW mmxreg,r/m64 ; 0F DD /r [PENT,MMX]
3562
+
3563
+ # |PADDxx| all perform packed addition between their two 64-bit operands,
3564
+ # storing the result in the destination (first) operand. The |PADDxB|
3565
+ # forms treat the 64-bit operands as vectors of eight bytes, and add each
3566
+ # byte individually; |PADDxW| treat the operands as vectors of four words;
3567
+ # and |PADDD| treats its operands as vectors of two doublewords.
3568
+
3569
+ # |PADDSB| and |PADDSW| perform signed saturation on the sum of each pair
3570
+ # of bytes or words: if the result of an addition is too large or too
3571
+ # small to fit into a signed byte or word result, it is clipped
3572
+ # (saturated) to the largest or smallest value which /will/ fit. |PADDUSB|
3573
+ # and |PADDUSW| similarly perform unsigned saturation, clipping to |0FFh|
3574
+ # or |0FFFFh| if the result is larger than that.
3575
+
3576
+
3577
+ # A.115 |PADDSIW|: MMX Packed Addition to Implicit Destination
3578
+
3579
+ # PADDSIW mmxreg,r/m64 ; 0F 51 /r [CYRIX,MMX]
3580
+
3581
+ # |PADDSIW|, specific to the Cyrix extensions to the MMX instruction set,
3582
+ # performs the same function as |PADDSW|, except that the result is not
3583
+ # placed in the register specified by the first operand, but instead in
3584
+ # the register whose number differs from the first operand only in the
3585
+ # last bit. So |PADDSIW MM0,MM2| would put the result in |MM1|, but
3586
+ # |PADDSIW MM1,MM2| would put the result in |MM0|.
3587
+
3588
+
3589
+ # A.116 |PAND|, |PANDN|: MMX Bitwise AND and AND-NOT
3590
+
3591
+ # PAND mmxreg,r/m64 ; 0F DB /r [PENT,MMX]
3592
+ # PANDN mmxreg,r/m64 ; 0F DF /r [PENT,MMX]
3593
+
3594
+ # |PAND| performs a bitwise AND operation between its two operands (i.e.
3595
+ # each bit of the result is 1 if and only if the corresponding bits of the
3596
+ # two inputs were both 1), and stores the result in the destination
3597
+ # (first) operand.
3598
+
3599
+ # |PANDN| performs the same operation, but performs a one's complement
3600
+ # operation on the destination (first) operand first.
3601
+
3602
+
3603
+ # A.117 |PAVEB|: MMX Packed Average
3604
+
3605
+ # PAVEB mmxreg,r/m64 ; 0F 50 /r [CYRIX,MMX]
3606
+
3607
+ # |PAVEB|, specific to the Cyrix MMX extensions, treats its two operands
3608
+ # as vectors of eight unsigned bytes, and calculates the average of the
3609
+ # corresponding bytes in the operands. The resulting vector of eight
3610
+ # averages is stored in the first operand.
3611
+
3612
+
3613
+ # A.118 |PCMPxx|: MMX Packed Comparison
3614
+
3615
+ # PCMPEQB mmxreg,r/m64 ; 0F 74 /r [PENT,MMX]
3616
+ # PCMPEQW mmxreg,r/m64 ; 0F 75 /r [PENT,MMX]
3617
+ # PCMPEQD mmxreg,r/m64 ; 0F 76 /r [PENT,MMX]
3618
+
3619
+ # PCMPGTB mmxreg,r/m64 ; 0F 64 /r [PENT,MMX]
3620
+ # PCMPGTW mmxreg,r/m64 ; 0F 65 /r [PENT,MMX]
3621
+ # PCMPGTD mmxreg,r/m64 ; 0F 66 /r [PENT,MMX]
3622
+
3623
+ # The |PCMPxx| instructions all treat their operands as vectors of bytes,
3624
+ # words, or doublewords; corresponding elements of the source and
3625
+ # destination are compared, and the corresponding element of the
3626
+ # destination (first) operand is set to all zeros or all ones depending on
3627
+ # the result of the comparison.
3628
+
3629
+ # |PCMPxxB| treats the operands as vectors of eight bytes, |PCMPxxW|
3630
+ # treats them as vectors of four words, and |PCMPxxD| as two doublewords.
3631
+
3632
+ # |PCMPEQx| sets the corresponding element of the destination operand to
3633
+ # all ones if the two elements compared are equal; |PCMPGTx| sets the
3634
+ # destination element to all ones if the element of the first
3635
+ # (destination) operand is greater (treated as a signed integer) than that
3636
+ # of the second (source) operand.
3637
+
3638
+
3639
+ # A.119 |PDISTIB|: MMX Packed Distance and Accumulate with Implied
3640
+ # Register
3641
+
3642
+ # PDISTIB mmxreg,mem64 ; 0F 54 /r [CYRIX,MMX]
3643
+
3644
+ # |PDISTIB|, specific to the Cyrix MMX extensions, treats its two input
3645
+ # operands as vectors of eight unsigned bytes. For each byte position, it
3646
+ # finds the absolute difference between the bytes in that position in the
3647
+ # two input operands, and adds that value to the byte in the same position
3648
+ # in the implied output register. The addition is saturated to an unsigned
3649
+ # byte in the same way as |PADDUSB|.
3650
+
3651
+ # The implied output register is found in the same way as |PADDSIW|
3652
+ # (section A.115 <#section-A.115>).
3653
+
3654
+ # Note that |PDISTIB| cannot take a register as its second source operand.
3655
+
3656
+
3657
+ # A.120 |PMACHRIW|: MMX Packed Multiply and Accumulate with Rounding
3658
+
3659
+ # PMACHRIW mmxreg,mem64 ; 0F 5E /r [CYRIX,MMX]
3660
+
3661
+ # |PMACHRIW| acts almost identically to |PMULHRIW| (section A.123
3662
+ # <#section-A.123>), but instead of /storing/ its result in the implied
3663
+ # destination register, it /adds/ its result, as four packed words, to the
3664
+ # implied destination register. No saturation is done: the addition can
3665
+ # wrap around.
3666
+
3667
+ # Note that |PMACHRIW| cannot take a register as its second source operand.
3668
+
3669
+
3670
+ # A.121 |PMADDWD|: MMX Packed Multiply and Add
3671
+
3672
+ # PMADDWD mmxreg,r/m64 ; 0F F5 /r [PENT,MMX]
3673
+
3674
+ # |PMADDWD| treats its two inputs as vectors of four signed words. It
3675
+ # multiplies corresponding elements of the two operands, giving four
3676
+ # signed doubleword results. The top two of these are added and placed in
3677
+ # the top 32 bits of the destination (first) operand; the bottom two are
3678
+ # added and placed in the bottom 32 bits.
3679
+
3680
+
3681
+ # A.122 |PMAGW|: MMX Packed Magnitude
3682
+
3683
+ # PMAGW mmxreg,r/m64 ; 0F 52 /r [CYRIX,MMX]
3684
+
3685
+ # |PMAGW|, specific to the Cyrix MMX extensions, treats both its operands
3686
+ # as vectors of four signed words. It compares the absolute values of the
3687
+ # words in corresponding positions, and sets each word of the destination
3688
+ # (first) operand to whichever of the two words in that position had the
3689
+ # larger absolute value.
3690
+
3691
+
3692
+ # A.123 |PMULHRW|, |PMULHRIW|: MMX Packed Multiply High with Rounding
3693
+
3694
+ # PMULHRW mmxreg,r/m64 ; 0F 59 /r [CYRIX,MMX]
3695
+ # PMULHRIW mmxreg,r/m64 ; 0F 5D /r [CYRIX,MMX]
3696
+
3697
+ # These instructions, specific to the Cyrix MMX extensions, treat their
3698
+ # operands as vectors of four signed words. Words in corresponding
3699
+ # positions are multiplied, to give a 32-bit value in which bits 30 and 31
3700
+ # are guaranteed equal. Bits 30 to 15 of this value (bit mask
3701
+ # |0x7FFF8000|) are taken and stored in the corresponding position of the
3702
+ # destination operand, after first rounding the low bit (equivalent to
3703
+ # adding |0x4000| before extracting bits 30 to 15).
3704
+
3705
+ # For |PMULHRW|, the destination operand is the first operand; for
3706
+ # |PMULHRIW| the destination operand is implied by the first operand in
3707
+ # the manner of |PADDSIW| (section A.115 <#section-A.115>).
3708
+
3709
+
3710
+ # A.124 |PMULHW|, |PMULLW|: MMX Packed Multiply
3711
+
3712
+ # PMULHW mmxreg,r/m64 ; 0F E5 /r [PENT,MMX]
3713
+ # PMULLW mmxreg,r/m64 ; 0F D5 /r [PENT,MMX]
3714
+
3715
+ # |PMULxW| treats its two inputs as vectors of four signed words. It
3716
+ # multiplies corresponding elements of the two operands, giving four
3717
+ # signed doubleword results.
3718
+
3719
+ # |PMULHW| then stores the top 16 bits of each doubleword in the
3720
+ # destination (first) operand; |PMULLW| stores the bottom 16 bits of each
3721
+ # doubleword in the destination operand.
3722
+
3723
+
3724
+ # A.125 |PMVccZB|: MMX Packed Conditional Move
3725
+
3726
+ # PMVZB mmxreg,mem64 ; 0F 58 /r [CYRIX,MMX]
3727
+ # PMVNZB mmxreg,mem64 ; 0F 5A /r [CYRIX,MMX]
3728
+ # PMVLZB mmxreg,mem64 ; 0F 5B /r [CYRIX,MMX]
3729
+ # PMVGEZB mmxreg,mem64 ; 0F 5C /r [CYRIX,MMX]
3730
+
3731
+ # These instructions, specific to the Cyrix MMX extensions, perform
3732
+ # parallel conditional moves. The two input operands are treated as
3733
+ # vectors of eight bytes. Each byte of the destination (first) operand is
3734
+ # either written from the corresponding byte of the source (second)
3735
+ # operand, or left alone, depending on the value of the byte in the
3736
+ # /implied/ operand (specified in the same way as |PADDSIW|, in section
3737
+ # A.115 <#section-A.115>).
3738
+
3739
+ # |PMVZB| performs each move if the corresponding byte in the implied
3740
+ # operand is zero. |PMVNZB| moves if the byte is non-zero. |PMVLZB| moves
3741
+ # if the byte is less than zero, and |PMVGEZB| moves if the byte is
3742
+ # greater than or equal to zero.
3743
+
3744
+ # Note that these instructions cannot take a register as their second
3745
+ # source operand.
3746
+
3747
+
3748
+ # A.126 |POP|: Pop Data from Stack
3749
+
3750
+ # POP reg16 ; o16 58+r [8086]
3751
+ # POP reg32 ; o32 58+r [386]
3752
+
3753
+ # POP r/m16 ; o16 8F /0 [8086]
3754
+ # POP r/m32 ; o32 8F /0 [386]
3755
+
3756
+ # POP CS ; 0F [8086,UNDOC]
3757
+ # POP DS ; 1F [8086]
3758
+ # POP ES ; 07 [8086]
3759
+ # POP SS ; 17 [8086]
3760
+ # POP FS ; 0F A1 [386]
3761
+ # POP GS ; 0F A9 [386]
3762
+
3763
+ # |POP| loads a value from the stack (from |[SS:SP]| or |[SS:ESP]|) and
3764
+ # then increments the stack pointer.
3765
+
3766
+ # The address-size attribute of the instruction determines whether |SP| or
3767
+ # |ESP| is used as the stack pointer: to deliberately override the default
3768
+ # given by the |BITS| setting, you can use an |a16| or |a32| prefix.
3769
+
3770
+ # The operand-size attribute of the instruction determines whether the
3771
+ # stack pointer is incremented by 2 or 4: this means that segment register
3772
+ # pops in |BITS 32| mode will pop 4 bytes off the stack and discard the
3773
+ # upper two of them. If you need to override that, you can use an |o16| or
3774
+ # |o32| prefix.
3775
+
3776
+ # The above opcode listings give two forms for general-purpose register
3777
+ # pop instructions: for example, |POP BX| has the two forms |5B| and |8F
3778
+ # C3|. NASM will always generate the shorter form when given |POP BX|.
3779
+ # NDISASM will disassemble both.
3780
+
3781
+ # |POP CS| is not a documented instruction, and is not supported on any
3782
+ # processor above the 8086 (since they use |0Fh| as an opcode prefix for
3783
+ # instruction set extensions). However, at least some 8086 processors do
3784
+ # support it, and so NASM generates it for completeness.
3785
+
3786
+
3787
+ # A.127 |POPAx|: Pop All General-Purpose Registers
3788
+
3789
+ # POPA ; 61 [186]
3790
+ # POPAW ; o16 61 [186]
3791
+ # POPAD ; o32 61 [386]
3792
+
3793
+ # |POPAW| pops a word from the stack into each of, successively, |DI|,
3794
+ # |SI|, |BP|, nothing (it discards a word from the stack which was a
3795
+ # placeholder for |SP|), |BX|, |DX|, |CX| and |AX|. It is intended to
3796
+ # reverse the operation of |PUSHAW| (see section A.135 <#section-A.135>),
3797
+ # but it ignores the value for |SP| that was pushed on the stack by |PUSHAW|.
3798
+
3799
+ # |POPAD| pops twice as much data, and places the results in |EDI|, |ESI|,
3800
+ # |EBP|, nothing (placeholder for |ESP|), |EBX|, |EDX|, |ECX| and |EAX|.
3801
+ # It reverses the operation of |PUSHAD|.
3802
+
3803
+ # |POPA| is an alias mnemonic for either |POPAW| or |POPAD|, depending on
3804
+ # the current |BITS| setting.
3805
+
3806
+ # Note that the registers are popped in reverse order of their numeric
3807
+ # values in opcodes (see section A.2.1 <#section-A.2.1>).
3808
+
3809
+
3810
+ # A.128 |POPFx|: Pop Flags Register
3811
+
3812
+ # POPF ; 9D [186]
3813
+ # POPFW ; o16 9D [186]
3814
+ # POPFD ; o32 9D [386]
3815
+
3816
+ # |POPFW| pops a word from the stack and stores it in the bottom 16 bits
3817
+ # of the flags register (or the whole flags register, on processors below
3818
+ # a 386). |POPFD| pops a doubleword and stores it in the entire flags
3819
+ # register.
3820
+
3821
+ # |POPF| is an alias mnemonic for either |POPFW| or |POPFD|, depending on
3822
+ # the current |BITS| setting.
3823
+
3824
+ # See also |PUSHF| (section A.136 <#section-A.136>).
3825
+
3826
+
3827
+ # A.129 |POR|: MMX Bitwise OR
3828
+
3829
+ # POR mmxreg,r/m64 ; 0F EB /r [PENT,MMX]
3830
+
3831
+ # |POR| performs a bitwise OR operation between its two operands (i.e.
3832
+ # each bit of the result is 1 if and only if at least one of the
3833
+ # corresponding bits of the two inputs was 1), and stores the result in
3834
+ # the destination (first) operand.
3835
+
3836
+
3837
+ # A.130 |PSLLx|, |PSRLx|, |PSRAx|: MMX Bit Shifts
3838
+
3839
+ # PSLLW mmxreg,r/m64 ; 0F F1 /r [PENT,MMX]
3840
+ # PSLLW mmxreg,imm8 ; 0F 71 /6 ib [PENT,MMX]
3841
+
3842
+ # PSLLD mmxreg,r/m64 ; 0F F2 /r [PENT,MMX]
3843
+ # PSLLD mmxreg,imm8 ; 0F 72 /6 ib [PENT,MMX]
3844
+
3845
+ # PSLLQ mmxreg,r/m64 ; 0F F3 /r [PENT,MMX]
3846
+ # PSLLQ mmxreg,imm8 ; 0F 73 /6 ib [PENT,MMX]
3847
+
3848
+ # PSRAW mmxreg,r/m64 ; 0F E1 /r [PENT,MMX]
3849
+ # PSRAW mmxreg,imm8 ; 0F 71 /4 ib [PENT,MMX]
3850
+
3851
+ # PSRAD mmxreg,r/m64 ; 0F E2 /r [PENT,MMX]
3852
+ # PSRAD mmxreg,imm8 ; 0F 72 /4 ib [PENT,MMX]
3853
+
3854
+ # PSRLW mmxreg,r/m64 ; 0F D1 /r [PENT,MMX]
3855
+ # PSRLW mmxreg,imm8 ; 0F 71 /2 ib [PENT,MMX]
3856
+
3857
+ # PSRLD mmxreg,r/m64 ; 0F D2 /r [PENT,MMX]
3858
+ # PSRLD mmxreg,imm8 ; 0F 72 /2 ib [PENT,MMX]
3859
+
3860
+ # PSRLQ mmxreg,r/m64 ; 0F D3 /r [PENT,MMX]
3861
+ # PSRLQ mmxreg,imm8 ; 0F 73 /2 ib [PENT,MMX]
3862
+
3863
+ # |PSxxQ| perform simple bit shifts on the 64-bit MMX registers: the
3864
+ # destination (first) operand is shifted left or right by the number of
3865
+ # bits given in the source (second) operand, and the vacated bits are
3866
+ # filled in with zeros (for a logical shift) or copies of the original
3867
+ # sign bit (for an arithmetic right shift).
3868
+
3869
+ # |PSxxW| and |PSxxD| perform packed bit shifts: the destination operand
3870
+ # is treated as a vector of four words or two doublewords, and each
3871
+ # element is shifted individually, so bits shifted out of one element do
3872
+ # not interfere with empty bits coming into the next.
3873
+
3874
+ # |PSLLx| and |PSRLx| perform logical shifts: the vacated bits at one end
3875
+ # of the shifted number are filled with zeros. |PSRAx| performs an
3876
+ # arithmetic right shift: the vacated bits at the top of the shifted
3877
+ # number are filled with copies of the original top (sign) bit.
3878
+
3879
+
3880
+ # A.131 |PSUBxx|: MMX Packed Subtraction
3881
+
3882
+ # PSUBB mmxreg,r/m64 ; 0F F8 /r [PENT,MMX]
3883
+ # PSUBW mmxreg,r/m64 ; 0F F9 /r [PENT,MMX]
3884
+ # PSUBD mmxreg,r/m64 ; 0F FA /r [PENT,MMX]
3885
+
3886
+ # PSUBSB mmxreg,r/m64 ; 0F E8 /r [PENT,MMX]
3887
+ # PSUBSW mmxreg,r/m64 ; 0F E9 /r [PENT,MMX]
3888
+
3889
+ # PSUBUSB mmxreg,r/m64 ; 0F D8 /r [PENT,MMX]
3890
+ # PSUBUSW mmxreg,r/m64 ; 0F D9 /r [PENT,MMX]
3891
+
3892
+ # |PSUBxx| all perform packed subtraction between their two 64-bit
3893
+ # operands, storing the result in the destination (first) operand. The
3894
+ # |PSUBxB| forms treat the 64-bit operands as vectors of eight bytes, and
3895
+ # subtract each byte individually; |PSUBxW| treat the operands as vectors
3896
+ # of four words; and |PSUBD| treats its operands as vectors of two
3897
+ # doublewords.
3898
+
3899
+ # In all cases, the elements of the operand on the right are subtracted
3900
+ # from the corresponding elements of the operand on the left, not the
3901
+ # other way round.
3902
+
3903
+ # |PSUBSB| and |PSUBSW| perform signed saturation on the sum of each pair
3904
+ # of bytes or words: if the result of a subtraction is too large or too
3905
+ # small to fit into a signed byte or word result, it is clipped
3906
+ # (saturated) to the largest or smallest value which /will/ fit. |PSUBUSB|
3907
+ # and |PSUBUSW| similarly perform unsigned saturation, clipping to |0FFh|
3908
+ # or |0FFFFh| if the result is larger than that.
3909
+
3910
+
3911
+ # A.132 |PSUBSIW|: MMX Packed Subtract with Saturation to Implied
3912
+ # Destination
3913
+
3914
+ # PSUBSIW mmxreg,r/m64 ; 0F 55 /r [CYRIX,MMX]
3915
+
3916
+ # |PSUBSIW|, specific to the Cyrix extensions to the MMX instruction set,
3917
+ # performs the same function as |PSUBSW|, except that the result is not
3918
+ # placed in the register specified by the first operand, but instead in
3919
+ # the implied destination register, specified as for |PADDSIW| (section
3920
+ # A.115 <#section-A.115>).
3921
+
3922
+
3923
+ # A.133 |PUNPCKxxx|: Unpack Data
3924
+
3925
+ # PUNPCKHBW mmxreg,r/m64 ; 0F 68 /r [PENT,MMX]
3926
+ # PUNPCKHWD mmxreg,r/m64 ; 0F 69 /r [PENT,MMX]
3927
+ # PUNPCKHDQ mmxreg,r/m64 ; 0F 6A /r [PENT,MMX]
3928
+
3929
+ # PUNPCKLBW mmxreg,r/m64 ; 0F 60 /r [PENT,MMX]
3930
+ # PUNPCKLWD mmxreg,r/m64 ; 0F 61 /r [PENT,MMX]
3931
+ # PUNPCKLDQ mmxreg,r/m64 ; 0F 62 /r [PENT,MMX]
3932
+
3933
+ # |PUNPCKxx| all treat their operands as vectors, and produce a new vector
3934
+ # generated by interleaving elements from the two inputs. The |PUNPCKHxx|
3935
+ # instructions start by throwing away the bottom half of each input
3936
+ # operand, and the |PUNPCKLxx| instructions throw away the top half.
3937
+
3938
+ # The remaining elements, totalling 64 bits, are then interleaved into the
3939
+ # destination, alternating elements from the second (source) operand and
3940
+ # the first (destination) operand: so the leftmost element in the result
3941
+ # always comes from the second operand, and the rightmost from the
3942
+ # destination.
3943
+
3944
+ # |PUNPCKxBW| works a byte at a time, |PUNPCKxWD| a word at a time, and
3945
+ # |PUNPCKxDQ| a doubleword at a time.
3946
+
3947
+ # So, for example, if the first operand held |0x7A6A5A4A3A2A1A0A| and the
3948
+ # second held |0x7B6B5B4B3B2B1B0B|, then:
3949
+
3950
+ # * |PUNPCKHBW| would return |0x7B7A6B6A5B5A4B4A|.
3951
+ # * |PUNPCKHWD| would return |0x7B6B7A6A5B4B5A4A|.
3952
+ # * |PUNPCKHDQ| would return |0x7B6B5B4B7A6A5A4A|.
3953
+ # * |PUNPCKLBW| would return |0x3B3A2B2A1B1A0B0A|.
3954
+ # * |PUNPCKLWD| would return |0x3B2B3A2A1B0B1A0A|.
3955
+ # * |PUNPCKLDQ| would return |0x3B2B1B0B3A2A1A0A|.
3956
+
3957
+
3958
+ # A.134 |PUSH|: Push Data on Stack
3959
+
3960
+ # PUSH reg16 ; o16 50+r [8086]
3961
+ # PUSH reg32 ; o32 50+r [386]
3962
+
3963
+ # PUSH r/m16 ; o16 FF /6 [8086]
3964
+ # PUSH r/m32 ; o32 FF /6 [386]
3965
+
3966
+ # PUSH CS ; 0E [8086]
3967
+ # PUSH DS ; 1E [8086]
3968
+ # PUSH ES ; 06 [8086]
3969
+ # PUSH SS ; 16 [8086]
3970
+ # PUSH FS ; 0F A0 [386]
3971
+ # PUSH GS ; 0F A8 [386]
3972
+
3973
+ # PUSH imm8 ; 6A ib [286]
3974
+ # PUSH imm16 ; o16 68 iw [286]
3975
+ # PUSH imm32 ; o32 68 id [386]
3976
+
3977
+ # |PUSH| decrements the stack pointer (|SP| or |ESP|) by 2 or 4, and then
3978
+ # stores the given value at |[SS:SP]| or |[SS:ESP]|.
3979
+
3980
+ # The address-size attribute of the instruction determines whether |SP| or
3981
+ # |ESP| is used as the stack pointer: to deliberately override the default
3982
+ # given by the |BITS| setting, you can use an |a16| or |a32| prefix.
3983
+
3984
+ # The operand-size attribute of the instruction determines whether the
3985
+ # stack pointer is decremented by 2 or 4: this means that segment register
3986
+ # pushes in |BITS 32| mode will push 4 bytes on the stack, of which the
3987
+ # upper two are undefined. If you need to override that, you can use an
3988
+ # |o16| or |o32| prefix.
3989
+
3990
+ # The above opcode listings give two forms for general-purpose register
3991
+ # push instructions: for example, |PUSH BX| has the two forms |53| and |FF
3992
+ # F3|. NASM will always generate the shorter form when given |PUSH BX|.
3993
+ # NDISASM will disassemble both.
3994
+
3995
+ # Unlike the undocumented and barely supported |POP CS|, |PUSH CS| is a
3996
+ # perfectly valid and sensible instruction, supported on all processors.
3997
+
3998
+ # The instruction |PUSH SP| may be used to distinguish an 8086 from later
3999
+ # processors: on an 8086, the value of |SP| stored is the value it has
4000
+ # /after/ the push instruction, whereas on later processors it is the
4001
+ # value /before/ the push instruction.
4002
+
4003
+
4004
+ # A.135 |PUSHAx|: Push All General-Purpose Registers
4005
+
4006
+ # PUSHA ; 60 [186]
4007
+ # PUSHAD ; o32 60 [386]
4008
+ # PUSHAW ; o16 60 [186]
4009
+
4010
+ # |PUSHAW| pushes, in succession, |AX|, |CX|, |DX|, |BX|, |SP|, |BP|, |SI|
4011
+ # and |DI| on the stack, decrementing the stack pointer by a total of 16.
4012
+
4013
+ # |PUSHAD| pushes, in succession, |EAX|, |ECX|, |EDX|, |EBX|, |ESP|,
4014
+ # |EBP|, |ESI| and |EDI| on the stack, decrementing the stack pointer by a
4015
+ # total of 32.
4016
+
4017
+ # In both cases, the value of |SP| or |ESP| pushed is its /original/
4018
+ # value, as it had before the instruction was executed.
4019
+
4020
+ # |PUSHA| is an alias mnemonic for either |PUSHAW| or |PUSHAD|, depending
4021
+ # on the current |BITS| setting.
4022
+
4023
+ # Note that the registers are pushed in order of their numeric values in
4024
+ # opcodes (see section A.2.1 <#section-A.2.1>).
4025
+
4026
+ # See also |POPA| (section A.127 <#section-A.127>).
4027
+
4028
+
4029
+ # A.136 |PUSHFx|: Push Flags Register
4030
+
4031
+ # PUSHF ; 9C [186]
4032
+ # PUSHFD ; o32 9C [386]
4033
+ # PUSHFW ; o16 9C [186]
4034
+
4035
+ # |PUSHFW| pops a word from the stack and stores it in the bottom 16 bits
4036
+ # of the flags register (or the whole flags register, on processors below
4037
+ # a 386). |PUSHFD| pops a doubleword and stores it in the entire flags
4038
+ # register.
4039
+
4040
+ # |PUSHF| is an alias mnemonic for either |PUSHFW| or |PUSHFD|, depending
4041
+ # on the current |BITS| setting.
4042
+
4043
+ # See also |POPF| (section A.128 <#section-A.128>).
4044
+
4045
+
4046
+ # A.137 |PXOR|: MMX Bitwise XOR
4047
+
4048
+ # PXOR mmxreg,r/m64 ; 0F EF /r [PENT,MMX]
4049
+
4050
+ # |PXOR| performs a bitwise XOR operation between its two operands (i.e.
4051
+ # each bit of the result is 1 if and only if exactly one of the
4052
+ # corresponding bits of the two inputs was 1), and stores the result in
4053
+ # the destination (first) operand.
4054
+
4055
+
4056
+ # A.138 |RCL|, |RCR|: Bitwise Rotate through Carry Bit
4057
+
4058
+ # RCL r/m8,1 ; D0 /2 [8086]
4059
+ # RCL r/m8,CL ; D2 /2 [8086]
4060
+ # RCL r/m8,imm8 ; C0 /2 ib [286]
4061
+ # RCL r/m16,1 ; o16 D1 /2 [8086]
4062
+ # RCL r/m16,CL ; o16 D3 /2 [8086]
4063
+ # RCL r/m16,imm8 ; o16 C1 /2 ib [286]
4064
+ # RCL r/m32,1 ; o32 D1 /2 [386]
4065
+ # RCL r/m32,CL ; o32 D3 /2 [386]
4066
+ # RCL r/m32,imm8 ; o32 C1 /2 ib [386]
4067
+
4068
+ # RCR r/m8,1 ; D0 /3 [8086]
4069
+ # RCR r/m8,CL ; D2 /3 [8086]
4070
+ # RCR r/m8,imm8 ; C0 /3 ib [286]
4071
+ # RCR r/m16,1 ; o16 D1 /3 [8086]
4072
+ # RCR r/m16,CL ; o16 D3 /3 [8086]
4073
+ # RCR r/m16,imm8 ; o16 C1 /3 ib [286]
4074
+ # RCR r/m32,1 ; o32 D1 /3 [386]
4075
+ # RCR r/m32,CL ; o32 D3 /3 [386]
4076
+ # RCR r/m32,imm8 ; o32 C1 /3 ib [386]
4077
+
4078
+ # |RCL| and |RCR| perform a 9-bit, 17-bit or 33-bit bitwise rotation
4079
+ # operation, involving the given source/destination (first) operand and
4080
+ # the carry bit. Thus, for example, in the operation |RCR AL,1|, a 9-bit
4081
+ # rotation is performed in which |AL| is shifted left by 1, the top bit of
4082
+ # |AL| moves into the carry flag, and the original value of the carry flag
4083
+ # is placed in the low bit of |AL|.
4084
+
4085
+ # The number of bits to rotate by is given by the second operand. Only the
4086
+ # bottom five bits of the rotation count are considered by processors
4087
+ # above the 8086.
4088
+
4089
+ # You can force the longer (286 and upwards, beginning with a |C1| byte)
4090
+ # form of |RCL foo,1| by using a |BYTE| prefix: |RCL foo,BYTE 1|.
4091
+ # Similarly with |RCR|.
4092
+
4093
+
4094
+ # A.139 |RDMSR|: Read Model-Specific Registers
4095
+
4096
+ # RDMSR ; 0F 32 [PENT]
4097
+
4098
+ # |RDMSR| reads the processor Model-Specific Register (MSR) whose index is
4099
+ # stored in |ECX|, and stores the result in |EDX:EAX|. See also |WRMSR|
4100
+ # (section A.165 <#section-A.165>).
4101
+
4102
+
4103
+ # A.140 |RDPMC|: Read Performance-Monitoring Counters
4104
+
4105
+ # RDPMC ; 0F 33 [P6]
4106
+
4107
+ # |RDPMC| reads the processor performance-monitoring counter whose index
4108
+ # is stored in |ECX|, and stores the result in |EDX:EAX|.
4109
+
4110
+
4111
+ # A.141 |RDTSC|: Read Time-Stamp Counter
4112
+
4113
+ # RDTSC ; 0F 31 [PENT]
4114
+
4115
+ # |RDTSC| reads the processor's time-stamp counter into |EDX:EAX|.
4116
+
4117
+
4118
+ # A.142 |RET|, |RETF|, |RETN|: Return from Procedure Call
4119
+
4120
+ # RET ; C3 [8086]
4121
+ # RET imm16 ; C2 iw [8086]
4122
+
4123
+ # RETF ; CB [8086]
4124
+ # RETF imm16 ; CA iw [8086]
4125
+
4126
+ # RETN ; C3 [8086]
4127
+ # RETN imm16 ; C2 iw [8086]
4128
+
4129
+ # |RET|, and its exact synonym |RETN|, pop |IP| or |EIP| from the stack
4130
+ # and transfer control to the new address. Optionally, if a numeric second
4131
+ # operand is provided, they increment the stack pointer by a further
4132
+ # |imm16| bytes after popping the return address.
4133
+
4134
+ # |RETF| executes a far return: after popping |IP|/|EIP|, it then pops
4135
+ # |CS|, and /then/ increments the stack pointer by the optional argument
4136
+ # if present.
4137
+
4138
+
4139
+ # A.143 |ROL|, |ROR|: Bitwise Rotate
4140
+
4141
+ # ROL r/m8,1 ; D0 /0 [8086]
4142
+ # ROL r/m8,CL ; D2 /0 [8086]
4143
+ # ROL r/m8,imm8 ; C0 /0 ib [286]
4144
+ # ROL r/m16,1 ; o16 D1 /0 [8086]
4145
+ # ROL r/m16,CL ; o16 D3 /0 [8086]
4146
+ # ROL r/m16,imm8 ; o16 C1 /0 ib [286]
4147
+ # ROL r/m32,1 ; o32 D1 /0 [386]
4148
+ # ROL r/m32,CL ; o32 D3 /0 [386]
4149
+ # ROL r/m32,imm8 ; o32 C1 /0 ib [386]
4150
+
4151
+ # ROR r/m8,1 ; D0 /1 [8086]
4152
+ # ROR r/m8,CL ; D2 /1 [8086]
4153
+ # ROR r/m8,imm8 ; C0 /1 ib [286]
4154
+ # ROR r/m16,1 ; o16 D1 /1 [8086]
4155
+ # ROR r/m16,CL ; o16 D3 /1 [8086]
4156
+ # ROR r/m16,imm8 ; o16 C1 /1 ib [286]
4157
+ # ROR r/m32,1 ; o32 D1 /1 [386]
4158
+ # ROR r/m32,CL ; o32 D3 /1 [386]
4159
+ # ROR r/m32,imm8 ; o32 C1 /1 ib [386]
4160
+
4161
+ # |ROL| and |ROR| perform a bitwise rotation operation on the given
4162
+ # source/destination (first) operand. Thus, for example, in the operation
4163
+ # |ROR AL,1|, an 8-bit rotation is performed in which |AL| is shifted left
4164
+ # by 1 and the original top bit of |AL| moves round into the low bit.
4165
+
4166
+ # The number of bits to rotate by is given by the second operand. Only the
4167
+ # bottom 3, 4 or 5 bits (depending on the source operand size) of the
4168
+ # rotation count are considered by processors above the 8086.
4169
+
4170
+ # You can force the longer (286 and upwards, beginning with a |C1| byte)
4171
+ # form of |ROL foo,1| by using a |BYTE| prefix: |ROL foo,BYTE 1|.
4172
+ # Similarly with |ROR|.
4173
+
4174
+
4175
+ # A.144 |RSM|: Resume from System-Management Mode
4176
+
4177
+ # RSM ; 0F AA [PENT]
4178
+
4179
+ # |RSM| returns the processor to its normal operating mode when it was in
4180
+ # System-Management Mode.
4181
+
4182
+
4183
+ # A.145 |SAHF|: Store AH to Flags
4184
+
4185
+ # SAHF ; 9E [8086]
4186
+
4187
+ # |SAHF| sets the low byte of the flags word according to the contents of
4188
+ # the |AH| register. See also |LAHF| (section A.90 <#section-A.90>).
4189
+
4190
+
4191
+ # A.146 |SAL|, |SAR|: Bitwise Arithmetic Shifts
4192
+
4193
+ # SAL r/m8,1 ; D0 /4 [8086]
4194
+ # SAL r/m8,CL ; D2 /4 [8086]
4195
+ # SAL r/m8,imm8 ; C0 /4 ib [286]
4196
+ # SAL r/m16,1 ; o16 D1 /4 [8086]
4197
+ # SAL r/m16,CL ; o16 D3 /4 [8086]
4198
+ # SAL r/m16,imm8 ; o16 C1 /4 ib [286]
4199
+ # SAL r/m32,1 ; o32 D1 /4 [386]
4200
+ # SAL r/m32,CL ; o32 D3 /4 [386]
4201
+ # SAL r/m32,imm8 ; o32 C1 /4 ib [386]
4202
+
4203
+ # SAR r/m8,1 ; D0 /0 [8086]
4204
+ # SAR r/m8,CL ; D2 /0 [8086]
4205
+ # SAR r/m8,imm8 ; C0 /0 ib [286]
4206
+ # SAR r/m16,1 ; o16 D1 /0 [8086]
4207
+ # SAR r/m16,CL ; o16 D3 /0 [8086]
4208
+ # SAR r/m16,imm8 ; o16 C1 /0 ib [286]
4209
+ # SAR r/m32,1 ; o32 D1 /0 [386]
4210
+ # SAR r/m32,CL ; o32 D3 /0 [386]
4211
+ # SAR r/m32,imm8 ; o32 C1 /0 ib [386]
4212
+
4213
+ # |SAL| and |SAR| perform an arithmetic shift operation on the given
4214
+ # source/destination (first) operand. The vacated bits are filled with
4215
+ # zero for |SAL|, and with copies of the original high bit of the source
4216
+ # operand for |SAR|.
4217
+
4218
+ # |SAL| is a synonym for |SHL| (see section A.152 <#section-A.152>). NASM
4219
+ # will assemble either one to the same code, but NDISASM will always
4220
+ # disassemble that code as |SHL|.
4221
+
4222
+ # The number of bits to shift by is given by the second operand. Only the
4223
+ # bottom 3, 4 or 5 bits (depending on the source operand size) of the
4224
+ # shift count are considered by processors above the 8086.
4225
+
4226
+ # You can force the longer (286 and upwards, beginning with a |C1| byte)
4227
+ # form of |SAL foo,1| by using a |BYTE| prefix: |SAL foo,BYTE 1|.
4228
+ # Similarly with |SAR|.
4229
+
4230
+
4231
+ # A.147 |SALC|: Set AL from Carry Flag
4232
+
4233
+ # SALC ; D6 [8086,UNDOC]
4234
+
4235
+ # |SALC| is an early undocumented instruction similar in concept to
4236
+ # |SETcc| (section A.150 <#section-A.150>). Its function is to set |AL| to
4237
+ # zero if the carry flag is clear, or to |0xFF| if it is set.
4238
+
4239
+
4240
+ # A.148 |SBB|: Subtract with Borrow
4241
+
4242
+ # SBB r/m8,reg8 ; 18 /r [8086]
4243
+ # SBB r/m16,reg16 ; o16 19 /r [8086]
4244
+ # SBB r/m32,reg32 ; o32 19 /r [386]
4245
+
4246
+ # SBB reg8,r/m8 ; 1A /r [8086]
4247
+ # SBB reg16,r/m16 ; o16 1B /r [8086]
4248
+ # SBB reg32,r/m32 ; o32 1B /r [386]
4249
+
4250
+ # SBB r/m8,imm8 ; 80 /3 ib [8086]
4251
+ # SBB r/m16,imm16 ; o16 81 /3 iw [8086]
4252
+ # SBB r/m32,imm32 ; o32 81 /3 id [386]
4253
+
4254
+ # SBB r/m16,imm8 ; o16 83 /3 ib [8086]
4255
+ # SBB r/m32,imm8 ; o32 83 /3 ib [8086]
4256
+
4257
+ # SBB AL,imm8 ; 1C ib [8086]
4258
+ # SBB AX,imm16 ; o16 1D iw [8086]
4259
+ # SBB EAX,imm32 ; o32 1D id [386]
4260
+
4261
+ # |SBB| performs integer subtraction: it subtracts its second operand,
4262
+ # plus the value of the carry flag, from its first, and leaves the result
4263
+ # in its destination (first) operand. The flags are set according to the
4264
+ # result of the operation: in particular, the carry flag is affected and
4265
+ # can be used by a subsequent |SBB| instruction.
4266
+
4267
+ # In the forms with an 8-bit immediate second operand and a longer first
4268
+ # operand, the second operand is considered to be signed, and is
4269
+ # sign-extended to the length of the first operand. In these cases, the
4270
+ # |BYTE| qualifier is necessary to force NASM to generate this form of the
4271
+ # instruction.
4272
+
4273
+ # To subtract one number from another without also subtracting the
4274
+ # contents of the carry flag, use |SUB| (section A.159 <#section-A.159>).
4275
+
4276
+
4277
+ # A.149 |SCASB|, |SCASW|, |SCASD|: Scan String
4278
+
4279
+ # SCASB ; AE [8086]
4280
+ # SCASW ; o16 AF [8086]
4281
+ # SCASD ; o32 AF [386]
4282
+
4283
+ # |SCASB| compares the byte in |AL| with the byte at |[ES:DI]| or
4284
+ # |[ES:EDI]|, and sets the flags accordingly. It then increments or
4285
+ # decrements (depending on the direction flag: increments if the flag is
4286
+ # clear, decrements if it is set) |DI| (or |EDI|).
4287
+
4288
+ # The register used is |DI| if the address size is 16 bits, and |EDI| if
4289
+ # it is 32 bits. If you need to use an address size not equal to the
4290
+ # current |BITS| setting, you can use an explicit |a16| or |a32| prefix.
4291
+
4292
+ # Segment override prefixes have no effect for this instruction: the use
4293
+ # of |ES| for the load from |[DI]| or |[EDI]| cannot be overridden.
4294
+
4295
+ # |SCASW| and |SCASD| work in the same way, but they compare a word to
4296
+ # |AX| or a doubleword to |EAX| instead of a byte to |AL|, and increment
4297
+ # or decrement the addressing registers by 2 or 4 instead of 1.
4298
+
4299
+ # The |REPE| and |REPNE| prefixes (equivalently, |REPZ| and |REPNZ|) may
4300
+ # be used to repeat the instruction up to |CX| (or |ECX| - again, the
4301
+ # address size chooses which) times until the first unequal or equal byte
4302
+ # is found.
4303
+
4304
+
4305
+ # A.150 |SETcc|: Set Register from Condition
4306
+
4307
+ # SETcc r/m8 ; 0F 90+cc /2 [386]
4308
+
4309
+ # |SETcc| sets the given 8-bit operand to zero if its condition is not
4310
+ # satisfied, and to 1 if it is.
4311
+
4312
+
4313
+ # A.151 |SGDT|, |SIDT|, |SLDT|: Store Descriptor Table Pointers
4314
+
4315
+ # SGDT mem ; 0F 01 /0 [286,PRIV]
4316
+ # SIDT mem ; 0F 01 /1 [286,PRIV]
4317
+ # SLDT r/m16 ; 0F 00 /0 [286,PRIV]
4318
+
4319
+ # |SGDT| and |SIDT| both take a 6-byte memory area as an operand: they
4320
+ # store the contents of the GDTR (global descriptor table register) or
4321
+ # IDTR (interrupt descriptor table register) into that area as a 32-bit
4322
+ # linear address and a 16-bit size limit from that area (in that order).
4323
+ # These are the only instructions which directly use /linear/ addresses,
4324
+ # rather than segment/offset pairs.
4325
+
4326
+ # |SLDT| stores the segment selector corresponding to the LDT (local
4327
+ # descriptor table) into the given operand.
4328
+
4329
+ # See also |LGDT|, |LIDT| and |LLDT| (section A.95 <#section-A.95>).
4330
+
4331
+
4332
+ # A.152 |SHL|, |SHR|: Bitwise Logical Shifts
4333
+
4334
+ # SHL r/m8,1 ; D0 /4 [8086]
4335
+ # SHL r/m8,CL ; D2 /4 [8086]
4336
+ # SHL r/m8,imm8 ; C0 /4 ib [286]
4337
+ # SHL r/m16,1 ; o16 D1 /4 [8086]
4338
+ # SHL r/m16,CL ; o16 D3 /4 [8086]
4339
+ # SHL r/m16,imm8 ; o16 C1 /4 ib [286]
4340
+ # SHL r/m32,1 ; o32 D1 /4 [386]
4341
+ # SHL r/m32,CL ; o32 D3 /4 [386]
4342
+ # SHL r/m32,imm8 ; o32 C1 /4 ib [386]
4343
+
4344
+ # SHR r/m8,1 ; D0 /5 [8086]
4345
+ # SHR r/m8,CL ; D2 /5 [8086]
4346
+ # SHR r/m8,imm8 ; C0 /5 ib [286]
4347
+ # SHR r/m16,1 ; o16 D1 /5 [8086]
4348
+ # SHR r/m16,CL ; o16 D3 /5 [8086]
4349
+ # SHR r/m16,imm8 ; o16 C1 /5 ib [286]
4350
+ # SHR r/m32,1 ; o32 D1 /5 [386]
4351
+ # SHR r/m32,CL ; o32 D3 /5 [386]
4352
+ # SHR r/m32,imm8 ; o32 C1 /5 ib [386]
4353
+
4354
+ # |SHL| and |SHR| perform a logical shift operation on the given
4355
+ # source/destination (first) operand. The vacated bits are filled with zero.
4356
+
4357
+ # A synonym for |SHL| is |SAL| (see section A.146 <#section-A.146>). NASM
4358
+ # will assemble either one to the same code, but NDISASM will always
4359
+ # disassemble that code as |SHL|.
4360
+
4361
+ # The number of bits to shift by is given by the second operand. Only the
4362
+ # bottom 3, 4 or 5 bits (depending on the source operand size) of the
4363
+ # shift count are considered by processors above the 8086.
4364
+
4365
+ # You can force the longer (286 and upwards, beginning with a |C1| byte)
4366
+ # form of |SHL foo,1| by using a |BYTE| prefix: |SHL foo,BYTE 1|.
4367
+ # Similarly with |SHR|.
4368
+
4369
+
4370
+ # A.153 |SHLD|, |SHRD|: Bitwise Double-Precision Shifts
4371
+
4372
+ # SHLD r/m16,reg16,imm8 ; o16 0F A4 /r ib [386]
4373
+ # SHLD r/m16,reg32,imm8 ; o32 0F A4 /r ib [386]
4374
+ # SHLD r/m16,reg16,CL ; o16 0F A5 /r [386]
4375
+ # SHLD r/m16,reg32,CL ; o32 0F A5 /r [386]
4376
+
4377
+ # SHRD r/m16,reg16,imm8 ; o16 0F AC /r ib [386]
4378
+ # SHRD r/m32,reg32,imm8 ; o32 0F AC /r ib [386]
4379
+ # SHRD r/m16,reg16,CL ; o16 0F AD /r [386]
4380
+ # SHRD r/m32,reg32,CL ; o32 0F AD /r [386]
4381
+
4382
+ # |SHLD| performs a double-precision left shift. It notionally places its
4383
+ # second operand to the right of its first, then shifts the entire bit
4384
+ # string thus generated to the left by a number of bits specified in the
4385
+ # third operand. It then updates only the /first/ operand according to the
4386
+ # result of this. The second operand is not modified.
4387
+
4388
+ # |SHRD| performs the corresponding right shift: it notionally places the
4389
+ # second operand to the /left/ of the first, shifts the whole bit string
4390
+ # right, and updates only the first operand.
4391
+
4392
+ # For example, if |EAX| holds |0x01234567| and |EBX| holds |0x89ABCDEF|,
4393
+ # then the instruction |SHLD EAX,EBX,4| would update |EAX| to hold
4394
+ # |0x12345678|. Under the same conditions, |SHRD EAX,EBX,4| would update
4395
+ # |EAX| to hold |0xF0123456|.
4396
+
4397
+ # The number of bits to shift by is given by the third operand. Only the
4398
+ # bottom 5 bits of the shift count are considered.
4399
+
4400
+
4401
+ # A.154 |SMI|: System Management Interrupt
4402
+
4403
+ # SMI ; F1 [386,UNDOC]
4404
+
4405
+ # This is an opcode apparently supported by some AMD processors (which is
4406
+ # why it can generate the same opcode as |INT1|), and places the machine
4407
+ # into system-management mode, a special debugging mode.
4408
+
4409
+
4410
+ # A.155 |SMSW|: Store Machine Status Word
4411
+
4412
+ # SMSW r/m16 ; 0F 01 /4 [286,PRIV]
4413
+
4414
+ # |SMSW| stores the bottom half of the |CR0| control register (or the
4415
+ # Machine Status Word, on 286 processors) into the destination operand.
4416
+ # See also |LMSW| (section A.96 <#section-A.96>).
4417
+
4418
+
4419
+ # A.156 |STC|, |STD|, |STI|: Set Flags
4420
+
4421
+ # STC ; F9 [8086]
4422
+ # STD ; FD [8086]
4423
+ # STI ; FB [8086]
4424
+
4425
+ # These instructions set various flags. |STC| sets the carry flag; |STD|
4426
+ # sets the direction flag; and |STI| sets the interrupt flag (thus
4427
+ # enabling interrupts).
4428
+
4429
+ # To clear the carry, direction, or interrupt flags, use the |CLC|, |CLD|
4430
+ # and |CLI| instructions (section A.15 <#section-A.15>). To invert the
4431
+ # carry flag, use |CMC| (section A.16 <#section-A.16>).
4432
+
4433
+
4434
+ # A.157 |STOSB|, |STOSW|, |STOSD|: Store Byte to String
4435
+
4436
+ # STOSB ; AA [8086]
4437
+ # STOSW ; o16 AB [8086]
4438
+ # STOSD ; o32 AB [386]
4439
+
4440
+ # |STOSB| stores the byte in |AL| at |[ES:DI]| or |[ES:EDI]|, and sets the
4441
+ # flags accordingly. It then increments or decrements (depending on the
4442
+ # direction flag: increments if the flag is clear, decrements if it is
4443
+ # set) |DI| (or |EDI|).
4444
+
4445
+ # The register used is |DI| if the address size is 16 bits, and |EDI| if
4446
+ # it is 32 bits. If you need to use an address size not equal to the
4447
+ # current |BITS| setting, you can use an explicit |a16| or |a32| prefix.
4448
+
4449
+ # Segment override prefixes have no effect for this instruction: the use
4450
+ # of |ES| for the store to |[DI]| or |[EDI]| cannot be overridden.
4451
+
4452
+ # |STOSW| and |STOSD| work in the same way, but they store the word in
4453
+ # |AX| or the doubleword in |EAX| instead of the byte in |AL|, and
4454
+ # increment or decrement the addressing registers by 2 or 4 instead of 1.
4455
+
4456
+ # The |REP| prefix may be used to repeat the instruction |CX| (or |ECX| -
4457
+ # again, the address size chooses which) times.
4458
+
4459
+
4460
+ # A.158 |STR|: Store Task Register
4461
+
4462
+ # STR r/m16 ; 0F 00 /1 [286,PRIV]
4463
+
4464
+ # |STR| stores the segment selector corresponding to the contents of the
4465
+ # Task Register into its operand.
4466
+
4467
+
4468
+ # A.159 |SUB|: Subtract Integers
4469
+
4470
+ # SUB r/m8,reg8 ; 28 /r [8086]
4471
+ # SUB r/m16,reg16 ; o16 29 /r [8086]
4472
+ # SUB r/m32,reg32 ; o32 29 /r [386]
4473
+
4474
+ # SUB reg8,r/m8 ; 2A /r [8086]
4475
+ # SUB reg16,r/m16 ; o16 2B /r [8086]
4476
+ # SUB reg32,r/m32 ; o32 2B /r [386]
4477
+
4478
+ # SUB r/m8,imm8 ; 80 /5 ib [8086]
4479
+ # SUB r/m16,imm16 ; o16 81 /5 iw [8086]
4480
+ # SUB r/m32,imm32 ; o32 81 /5 id [386]
4481
+
4482
+ # SUB r/m16,imm8 ; o16 83 /5 ib [8086]
4483
+ # SUB r/m32,imm8 ; o32 83 /5 ib [386]
4484
+
4485
+ # SUB AL,imm8 ; 2C ib [8086]
4486
+ # SUB AX,imm16 ; o16 2D iw [8086]
4487
+ # SUB EAX,imm32 ; o32 2D id [386]
4488
+
4489
+ # |SUB| performs integer subtraction: it subtracts its second operand from
4490
+ # its first, and leaves the result in its destination (first) operand. The
4491
+ # flags are set according to the result of the operation: in particular,
4492
+ # the carry flag is affected and can be used by a subsequent |SBB|
4493
+ # instruction (section A.148 <#section-A.148>).
4494
+
4495
+ # In the forms with an 8-bit immediate second operand and a longer first
4496
+ # operand, the second operand is considered to be signed, and is
4497
+ # sign-extended to the length of the first operand. In these cases, the
4498
+ # |BYTE| qualifier is necessary to force NASM to generate this form of the
4499
+ # instruction.
4500
+
4501
+
4502
+ # A.160 |TEST|: Test Bits (notional bitwise AND)
4503
+
4504
+ # TEST r/m8,reg8 ; 84 /r [8086]
4505
+ # TEST r/m16,reg16 ; o16 85 /r [8086]
4506
+ # TEST r/m32,reg32 ; o32 85 /r [386]
4507
+
4508
+ # TEST r/m8,imm8 ; F6 /7 ib [8086]
4509
+ # TEST r/m16,imm16 ; o16 F7 /7 iw [8086]
4510
+ # TEST r/m32,imm32 ; o32 F7 /7 id [386]
4511
+
4512
+ # TEST AL,imm8 ; A8 ib [8086]
4513
+ # TEST AX,imm16 ; o16 A9 iw [8086]
4514
+ # TEST EAX,imm32 ; o32 A9 id [386]
4515
+
4516
+ # |TEST| performs a `mental' bitwise AND of its two operands, and affects
4517
+ # the flags as if the operation had taken place, but does not store the
4518
+ # result of the operation anywhere.
4519
+
4520
+
4521
+ # A.161 |UMOV|: User Move Data
4522
+
4523
+ # UMOV r/m8,reg8 ; 0F 10 /r [386,UNDOC]
4524
+ # UMOV r/m16,reg16 ; o16 0F 11 /r [386,UNDOC]
4525
+ # UMOV r/m32,reg32 ; o32 0F 11 /r [386,UNDOC]
4526
+
4527
+ # UMOV reg8,r/m8 ; 0F 12 /r [386,UNDOC]
4528
+ # UMOV reg16,r/m16 ; o16 0F 13 /r [386,UNDOC]
4529
+ # UMOV reg32,r/m32 ; o32 0F 13 /r [386,UNDOC]
4530
+
4531
+ # This undocumented instruction is used by in-circuit emulators to access
4532
+ # user memory (as opposed to host memory). It is used just like an
4533
+ # ordinary memory/register or register/register |MOV| instruction, but
4534
+ # accesses user space.
4535
+
4536
+
4537
+ # A.162 |VERR|, |VERW|: Verify Segment Readability/Writability
4538
+
4539
+ # VERR r/m16 ; 0F 00 /4 [286,PRIV]
4540
+
4541
+ # VERW r/m16 ; 0F 00 /5 [286,PRIV]
4542
+
4543
+ # |VERR| sets the zero flag if the segment specified by the selector in
4544
+ # its operand can be read from at the current privilege level. |VERW| sets
4545
+ # the zero flag if the segment can be written.
4546
+
4547
+
4548
+ # A.163 |WAIT|: Wait for Floating-Point Processor
4549
+
4550
+ # WAIT ; 9B [8086]
4551
+
4552
+ # |WAIT|, on 8086 systems with a separate 8087 FPU, waits for the FPU to
4553
+ # have finished any operation it is engaged in before continuing main
4554
+ # processor operations, so that (for example) an FPU store to main memory
4555
+ # can be guaranteed to have completed before the CPU tries to read the
4556
+ # result back out.
4557
+
4558
+ # On higher processors, |WAIT| is unnecessary for this purpose, and it has
4559
+ # the alternative purpose of ensuring that any pending unmasked FPU
4560
+ # exceptions have happened before execution continues.
4561
+
4562
+
4563
+ # A.164 |WBINVD|: Write Back and Invalidate Cache
4564
+
4565
+ # WBINVD ; 0F 09 [486]
4566
+
4567
+ # |WBINVD| invalidates and empties the processor's internal caches, and
4568
+ # causes the processor to instruct external caches to do the same. It
4569
+ # writes the contents of the caches back to memory first, so no data is
4570
+ # lost. To flush the caches quickly without bothering to write the data
4571
+ # back first, use |INVD| (section A.84 <#section-A.84>).
4572
+
4573
+
4574
+ # A.165 |WRMSR|: Write Model-Specific Registers
4575
+
4576
+ # WRMSR ; 0F 30 [PENT]
4577
+
4578
+ # |WRMSR| writes the value in |EDX:EAX| to the processor Model-Specific
4579
+ # Register (MSR) whose index is stored in |ECX|. See also |RDMSR| (section
4580
+ # A.139 <#section-A.139>).
4581
+
4582
+
4583
+ # A.166 |XADD|: Exchange and Add
4584
+
4585
+ # XADD r/m8,reg8 ; 0F C0 /r [486]
4586
+ # XADD r/m16,reg16 ; o16 0F C1 /r [486]
4587
+ # XADD r/m32,reg32 ; o32 0F C1 /r [486]
4588
+
4589
+ # |XADD| exchanges the values in its two operands, and then adds them
4590
+ # together and writes the result into the destination (first) operand.
4591
+ # This instruction can be used with a |LOCK| prefix for multi-processor
4592
+ # synchronisation purposes.
4593
+
4594
+
4595
+ # A.167 |XBTS|: Extract Bit String
4596
+
4597
+ # XBTS reg16,r/m16 ; o16 0F A6 /r [386,UNDOC]
4598
+ # XBTS reg32,r/m32 ; o32 0F A6 /r [386,UNDOC]
4599
+
4600
+ # No clear documentation seems to be available for this instruction: the
4601
+ # best I've been able to find reads `Takes a string of bits from the first
4602
+ # operand and puts them in the second operand'. It is present only in
4603
+ # early 386 processors, and conflicts with the opcodes for |CMPXCHG486|.
4604
+ # NASM supports it only for completeness. Its counterpart is |IBTS| (see
4605
+ # section A.75 <#section-A.75>).
4606
+
4607
+
4608
+ # A.168 |XCHG|: Exchange
4609
+
4610
+ # XCHG reg8,r/m8 ; 86 /r [8086]
4611
+ # XCHG reg16,r/m8 ; o16 87 /r [8086]
4612
+ # XCHG reg32,r/m32 ; o32 87 /r [386]
4613
+
4614
+ # XCHG r/m8,reg8 ; 86 /r [8086]
4615
+ # XCHG r/m16,reg16 ; o16 87 /r [8086]
4616
+ # XCHG r/m32,reg32 ; o32 87 /r [386]
4617
+
4618
+ # XCHG AX,reg16 ; o16 90+r [8086]
4619
+ # XCHG EAX,reg32 ; o32 90+r [386]
4620
+ # XCHG reg16,AX ; o16 90+r [8086]
4621
+ # XCHG reg32,EAX ; o32 90+r [386]
4622
+
4623
+ # |XCHG| exchanges the values in its two operands. It can be used with a
4624
+ # |LOCK| prefix for purposes of multi-processor synchronisation.
4625
+
4626
+ # |XCHG AX,AX| or |XCHG EAX,EAX| (depending on the |BITS| setting)
4627
+ # generates the opcode |90h|, and so is a synonym for |NOP| (section A.109
4628
+ # <#section-A.109>).
4629
+
4630
+
4631
+ # A.169 |XLATB|: Translate Byte in Lookup Table
4632
+
4633
+ # XLATB ; D7 [8086]
4634
+
4635
+ # |XLATB| adds the value in |AL|, treated as an unsigned byte, to |BX| or
4636
+ # |EBX|, and loads the byte from the resulting address (in the segment
4637
+ # specified by |DS|) back into |AL|.
4638
+
4639
+ # The base register used is |BX| if the address size is 16 bits, and |EBX|
4640
+ # if it is 32 bits. If you need to use an address size not equal to the
4641
+ # current |BITS| setting, you can use an explicit |a16| or |a32| prefix.
4642
+
4643
+ # The segment register used to load from |[BX+AL]| or |[EBX+AL]| can be
4644
+ # overridden by using a segment register name as a prefix (for example,
4645
+ # |es xlatb|).
4646
+
4647
+
4648
+ # A.170 |XOR|: Bitwise Exclusive OR
4649
+
4650
+ # XOR r/m8,reg8 ; 30 /r [8086]
4651
+ # XOR r/m16,reg16 ; o16 31 /r [8086]
4652
+ # XOR r/m32,reg32 ; o32 31 /r [386]
4653
+
4654
+ # XOR reg8,r/m8 ; 32 /r [8086]
4655
+ # XOR reg16,r/m16 ; o16 33 /r [8086]
4656
+ # XOR reg32,r/m32 ; o32 33 /r [386]
4657
+
4658
+ # XOR r/m8,imm8 ; 80 /6 ib [8086]
4659
+ # XOR r/m16,imm16 ; o16 81 /6 iw [8086]
4660
+ # XOR r/m32,imm32 ; o32 81 /6 id [386]
4661
+
4662
+ # XOR r/m16,imm8 ; o16 83 /6 ib [8086]
4663
+ # XOR r/m32,imm8 ; o32 83 /6 ib [386]
4664
+
4665
+ # XOR AL,imm8 ; 34 ib [8086]
4666
+ # XOR AX,imm16 ; o16 35 iw [8086]
4667
+ # XOR EAX,imm32 ; o32 35 id [386]
4668
+
4669
+ # |XOR| performs a bitwise XOR operation between its two operands (i.e.
4670
+ # each bit of the result is 1 if and only if exactly one of the
4671
+ # corresponding bits of the two inputs was 1), and stores the result in
4672
+ # the destination (first) operand.
4673
+
4674
+ # In the forms with an 8-bit immediate second operand and a longer first
4675
+ # operand, the second operand is considered to be signed, and is
4676
+ # sign-extended to the length of the first operand. In these cases, the
4677
+ # |BYTE| qualifier is necessary to force NASM to generate this form of the
4678
+ # instruction.
4679
+
4680
+ # The MMX instruction |PXOR| (see section A.137 <#section-A.137>) performs
4681
+ # the same operation on the 64-bit MMX registers.
4682
+
4683
+ # Previous Chapter <nasmdo10.html> | Contents <nasmdoc0.html> | Index
4684
+ # <nasmdoci.html>