wilson 1.0.0

Sign up to get free protection for your applications and to get access to all the features.
data/History.txt ADDED
@@ -0,0 +1,6 @@
1
+ === 1.0.0 / 2009-01-20
2
+
3
+ * 1 major enhancement
4
+
5
+ * Birfday!
6
+
data/Manifest.txt ADDED
@@ -0,0 +1,6 @@
1
+ History.txt
2
+ Manifest.txt
3
+ README.txt
4
+ Rakefile
5
+ lib/wilson.rb
6
+ test/test_wilson.rb
data/README.txt ADDED
@@ -0,0 +1,60 @@
1
+ = wilson
2
+
3
+ * http://rubyforge.org/projects/seattlerb
4
+
5
+ == DESCRIPTION:
6
+
7
+ Wilson is a pure ruby x86 assembler. No, really. Worst Idea Evar.
8
+
9
+ Why "wilson"? I wanted to name it "metal", but there is an existing
10
+ project with that name... So I'm naming it after Wilson Bilkovich, who
11
+ is about as metal as you can get (and it is easier to spell than
12
+ "bilkovich", even tho that sounds more metal).
13
+
14
+ == FEATURES/PROBLEMS:
15
+
16
+ * Generates x86 machine code directly, with no dependencies.
17
+ * Still has problems with jumps, I need smart ppl to help me.
18
+
19
+ == SYNOPSIS:
20
+
21
+ class X
22
+ defasm :superfast_meaning_of_life do
23
+ eax.mov 42.r # (42 << 1) + 1
24
+ end
25
+ end
26
+
27
+ p X.new.superfast_meaning_of_life # => 42
28
+
29
+ == REQUIREMENTS:
30
+
31
+ * rubygems
32
+
33
+ == INSTALL:
34
+
35
+ * sudo gem install wilson
36
+
37
+ == LICENSE:
38
+
39
+ (The MIT License)
40
+
41
+ Copyright (c) 2008 Ryan Davis, Seattle.rb
42
+
43
+ Permission is hereby granted, free of charge, to any person obtaining
44
+ a copy of this software and associated documentation files (the
45
+ 'Software'), to deal in the Software without restriction, including
46
+ without limitation the rights to use, copy, modify, merge, publish,
47
+ distribute, sublicense, and/or sell copies of the Software, and to
48
+ permit persons to whom the Software is furnished to do so, subject to
49
+ the following conditions:
50
+
51
+ The above copyright notice and this permission notice shall be
52
+ included in all copies or substantial portions of the Software.
53
+
54
+ THE SOFTWARE IS PROVIDED 'AS IS', WITHOUT WARRANTY OF ANY KIND,
55
+ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
56
+ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
57
+ IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
58
+ CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
59
+ TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
60
+ SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
data/Rakefile ADDED
@@ -0,0 +1,71 @@
1
+ # -*- ruby -*-
2
+
3
+ require 'rubygems'
4
+ require 'hoe'
5
+ require './lib/wilson.rb'
6
+
7
+ h=Hoe.new('wilson', Wilson::VERSION) do |p|
8
+ p.rubyforge_name = 'seattlerb'
9
+ p.developer('Ryan Davis', 'ryand-ruby@zenspider.com')
10
+ end
11
+
12
+ class Hoe
13
+ def test_cmd flavor = nil
14
+ msg = flavor ? :sh : :ruby
15
+ tests = ["rubygems", self.testlib] +
16
+ test_globs.map { |g| Dir.glob(g) }.flatten
17
+ tests.map! {|f| %Q(require "#{f}")}
18
+ cmd = "#{RUBY_FLAGS} -e '#{tests.join("; ")}' #{FILTER}"
19
+
20
+ ENV['EXCLUDED_VERSIONS'] = multiruby_skip.join(":")
21
+
22
+ cmd = "#{flavor} #{cmd}" if flavor
23
+
24
+ return msg, cmd
25
+ end
26
+ end
27
+
28
+ # def run_tests(multi=false) # :nodoc:
29
+ # msg = multi ? :sh : :ruby
30
+ # cmd = if test ?f, 'test/test_all.rb' then
31
+ # "#{RUBY_FLAGS} test/test_all.rb #{FILTER}"
32
+ # else
33
+ # tests = ["rubygems", self.testlib] +
34
+ # test_globs.map { |g| Dir.glob(g) }.flatten
35
+ # tests.map! {|f| %Q(require "#{f}")}
36
+ # "#{RUBY_FLAGS} -e '#{tests.join("; ")}' #{FILTER}"
37
+ # end
38
+ #
39
+ # excludes = multiruby_skip.join(":")
40
+ # ENV['EXCLUDED_VERSIONS'] = excludes
41
+ # cmd = "multiruby #{cmd}" if multi
42
+ #
43
+ # send msg, cmd
44
+ # end
45
+
46
+ namespace :test do
47
+ desc "profiles your tests"
48
+ task :prof do
49
+ send(*h.test_cmd(:zenprofile))
50
+ end
51
+
52
+ desc "rcov your tests"
53
+ task :rcov do
54
+ raise "not yet"
55
+ end
56
+ end
57
+
58
+ begin
59
+ require 'rcov/rcovtask'
60
+ Rcov::RcovTask.new do |t|
61
+ pattern = ENV['PATTERN'] || 'test/test_*.rb'
62
+
63
+ t.test_files = FileList[pattern]
64
+ t.verbose = true
65
+ t.rcov_opts << "--threshold 80"
66
+ t.rcov_opts << "--no-color"
67
+ end
68
+ rescue LoadError
69
+ # skip
70
+ end
71
+ # vim: syntax=Ruby
data/lib/wilson.rb ADDED
@@ -0,0 +1,4684 @@
1
+ # -*- coding: utf-8 -*-
2
+
3
+ require 'dl'
4
+ require 'dl/import'
5
+
6
+ module Ruby
7
+ extend DL::Importable
8
+
9
+ typealias "VALUE", "unsigned long", proc { |v| v.object_id << 1 }
10
+
11
+ dlload "libruby.dylib"
12
+
13
+ extern "void rb_define_method(VALUE, char*, void*, int)"
14
+ end
15
+
16
+ class Object
17
+ def r
18
+ self.object_id
19
+ end
20
+ end
21
+
22
+ class Integer
23
+ def r
24
+ (self.object_id << 1) + 1
25
+ end
26
+
27
+ def inspect
28
+ "0x#{to_s 16}"
29
+ end
30
+ end
31
+
32
+ class Module
33
+ @@asm = []
34
+
35
+ def defasm name, *args, &block
36
+ asm = Wilson::MachineCodeX86.new
37
+
38
+ asm.ebp.push
39
+ # asm.esi.push # TODO?
40
+ # asm.edi.push
41
+ asm.ebp.mov asm.esp
42
+
43
+ size = asm.stream.size
44
+
45
+ asm.instance_eval(&block)
46
+
47
+ if asm.stream.size == size # return nil
48
+ warn "returning nil for #{self}##{name}"
49
+ asm.eax.mov 4
50
+ end
51
+
52
+ # asm.edi.pop
53
+ # asm.esi.pop
54
+ # asm.ebp.pop
55
+
56
+ asm.leave
57
+ asm.ret
58
+
59
+ code = asm.stream.pack("C*")
60
+ @@asm << code
61
+
62
+ Ruby.rb_define_method self, name.to_s, code, 0
63
+ end
64
+ end
65
+
66
+ class Object
67
+ def subclass_responsibility; raise "subclass responsibility" end
68
+ def no!; false end
69
+
70
+ alias :address? :no!
71
+ alias :future_label? :no!
72
+ alias :immediate? :no!
73
+ alias :immediate_value? :no!
74
+ alias :label? :no!
75
+ alias :offset? :no!
76
+ alias :operand? :no!
77
+ alias :register? :no!
78
+ alias :special_register? :no!
79
+ end
80
+
81
+ class Integer
82
+ def m
83
+ address = Wilson::Address.new
84
+ address.offset = self
85
+ address
86
+ end
87
+
88
+ def immediate_value?
89
+ true
90
+ end
91
+ end
92
+
93
+ class Array
94
+ def second
95
+ self[1]
96
+ end
97
+
98
+ def push_D integer
99
+ self.push(*[integer].pack("V").unpack("C4"))
100
+ end
101
+
102
+ def push_B integer
103
+ self << (integer & 255)
104
+ end
105
+
106
+ def push_W integer
107
+ self.push((integer & 255), (integer >> 8 & 255))
108
+ end
109
+ end
110
+
111
+ module Wilson
112
+ VERSION = '1.0.0'
113
+
114
+ ##
115
+ # Assembler parses the NASM documentation and creates Command
116
+ # objects for it
117
+
118
+ class Assembler
119
+ attr_accessor :commands
120
+
121
+ def self.nasm_fixes
122
+ # TODO: extend parser to split /[,:]/ and remove some of these
123
+ '
124
+ CALL imm,imm16 ; o16 9A iw iw [8086]
125
+ CALL imm,imm32 ; o32 9A id iw [386]
126
+ CALLFAR mem16 ; o16 FF /3 [8086]
127
+ CALLFAR mem32 ; o32 FF /3 [386]
128
+
129
+ Jcc imm ; 0F 80+cc rw/rd [386]
130
+
131
+ JMP imm,imm16 ; o16 EA iw iw [8086]
132
+ JMP imm,imm32 ; o32 EA id iw [386]
133
+ JMP imm16 ; E9 rw/rd [8086]
134
+ JMP imm32 ; E9 rw/rd [8086]
135
+ JMP imm8 ; EB rb [8086]
136
+ JMPFAR mem16 ; o16 FF /5 [8086]
137
+ JMPFAR mem32 ; o32 FF /5 [386]
138
+
139
+ FADDTO fpureg ; DC C0+r [8086,FPU]
140
+ FDIVTO fpureg ; DC F8+r [8086,FPU]
141
+ FDIVRTO fpureg ; DC F0+r [8086,FPU]
142
+ FMULTO fpureg ; DC C8+r [8086,FPU]
143
+ FSUBTO fpureg ; DC E8+r [8086,FPU]
144
+ FSUBRTO fpureg ; DC E0+r [8086,FPU]
145
+ '
146
+ end
147
+
148
+ def self.nasm
149
+ File.read(__FILE__).split(/__END__/).last
150
+ end
151
+
152
+ @@default = nil
153
+
154
+ def self.default
155
+ @@default ||= self.new.parse
156
+ end
157
+
158
+ def self.default= o
159
+ @@default = o
160
+ end
161
+
162
+ def self.commands
163
+ self.default.commands
164
+ end
165
+
166
+ def self.parse
167
+ self.new.parse
168
+ end
169
+
170
+ def initialize
171
+ self.commands = []
172
+ end
173
+
174
+ def expand_parameters command
175
+ command.parameters.each_with_index do |parameter, index|
176
+ if String === parameter && parameter =~ /^r\/m(\d+)/ then
177
+ bits = $1.to_i
178
+ newCommand = command.dup
179
+ commands << newCommand
180
+ case bits
181
+ when 8, 16, 32 then
182
+ command.parameters[index] = MemoryRegister.new bits
183
+ newCommand.parameters[index] = Address.new false, bits
184
+ when 64 then
185
+ command.parameters[index] = MMXRegister.new bits
186
+ newCommand.parameters[index] = Address.new false, bits
187
+ end
188
+ end
189
+ end
190
+ end
191
+
192
+ def add_conditional_commands prototype
193
+ prototype.opcode = prototype.opcode[0..-3]
194
+
195
+ self.conditionals.each do |conditional, value|
196
+ command = prototype.dup
197
+ command.opcode += conditional
198
+
199
+ command.opcodes.each_with_index do |op, index|
200
+ command.opcodes[index] = ($1.hex+value).to_s(16) if op =~ /(.*)\+cc$/
201
+ end
202
+
203
+ self.add_command command
204
+ end
205
+ end
206
+
207
+ def process_line line # TODO: remove
208
+ return if line.empty?
209
+ return unless line =~ /^[A-Z].+;.*\[/
210
+
211
+ self.parse_command line
212
+ end
213
+
214
+ def add_command command
215
+ return self.add_conditional_commands(command) if command.opcode =~ /cc$/i
216
+ self.commands << command
217
+ self.expand_parameters command
218
+ end
219
+
220
+ def conditionals
221
+ @conditionals ||= {
222
+ 'O' => 0, 'NO' => 1, 'B' => 2, 'C' => 2, 'NAE' => 2,
223
+ 'AE' => 3, 'NB' => 3, 'NC' => 3, 'E' => 4, 'Z' => 4,
224
+ 'NE' => 5, 'NZ' => 5, 'BE' => 6, 'NA' => 6, 'A' => 7,
225
+ 'NBE' => 7, 'S' => 8, 'NS' => 9, 'P' => 10, 'PE' => 10,
226
+ 'NP' => 11, 'PO' => 11, 'L' => 12, 'NGE' => 12, 'GE' => 13,
227
+ 'NL' => 13, 'LE' => 14, 'NG' => 14, 'G' => 15, 'NLE' => 15,
228
+ }
229
+ end
230
+
231
+ def parse_command line
232
+ if line =~ /^(\w+)\s+([^;]*)\s+;\s+([^\[]+)\s+\[([\w,]+)\]/ then
233
+ name, params, ops, procs = $1, $2, $3, $4
234
+
235
+ command = Command.new
236
+ command.opcode = name
237
+ command.opcodes = ops.split
238
+ command.processors = procs.split(/,/)
239
+
240
+ command.initialize_parameters params.strip
241
+
242
+ self.add_command command
243
+ else
244
+ raise "unparsed: #{line}"
245
+ end
246
+ end
247
+
248
+ def parse
249
+ (self.class.nasm + self.class.nasm_fixes).each_line do |line|
250
+ self.process_line line.strip.sub(/^# /, '')
251
+ end
252
+ self
253
+ end
254
+ end
255
+
256
+ ##
257
+ # Command is a potential command you can call. It has an
258
+ # opcode (eg: MOV) and the memory format that it outputs as
259
+ # (opcodes) as well as the kinds of parameters it takes and the
260
+ # processor types that support the command.
261
+
262
+ class Command
263
+ attr_accessor :opcode, :parameters, :opcodes, :processors
264
+
265
+ def dup
266
+ x = super
267
+ x.parameters = x.parameters.dup
268
+ x.opcodes = x.opcodes.dup
269
+ x
270
+ end
271
+
272
+ # TODO: learn this better, and figure out why not polymorphic ==
273
+ def parameter_matches a, b
274
+ return false if String === b
275
+
276
+ if a.register? && b.register? then
277
+ return a.bits == b.bits && (b.id.nil? || a.id == b.id)
278
+ end
279
+
280
+ if a.address? && b.address? then
281
+ return ! b.offset? || a.offset?
282
+ end
283
+
284
+ if a.special_register? && b.special_register? then
285
+ return a.class == b.class && (b.id.nil? || a.id == b.id)
286
+ end
287
+
288
+ return false unless b.immediate?
289
+
290
+ if a.immediate_value? then
291
+ return (b.value && b.value == a) || b.bits.nil? || a < (2 ** b.bits)
292
+ end
293
+
294
+ if a.label? then
295
+ return a.future_label? ? b.bits == a.machine.bits :
296
+ a.bits <= (b.bits || a.machine.bits)
297
+ end
298
+
299
+ false
300
+ end
301
+
302
+ def instruction_applies? instruction
303
+ return false if instruction.opcode != self.opcode
304
+ return false if instruction.parameters.size != self.parameters.size
305
+
306
+ instruction.parameters.zip(self.parameters).all? { |a, b|
307
+ self.parameter_matches a, b
308
+ }
309
+ end
310
+
311
+ def to_parameter parameter
312
+ case parameter
313
+ when 'r/m8' then return parameter # "Expanded by the parser"
314
+ when 'r/m16' then return parameter # "Expanded by the parser"
315
+ when 'r/m32' then return parameter # "Expanded by the parser"
316
+ when 'r/m64' then return parameter # "Expanded by the parser"
317
+ when 'TO fpureg' then return parameter # "Fixed in nasm_fixes"
318
+ when 'SHORT imm' then return parameter # "Fixed in nasm_fixes"
319
+ when 'FAR mem' then return parameter # "Fixed in nasm_fixes"
320
+ when 'FAR mem16' then return parameter # "Fixed in nasm_fixes"
321
+ when 'FAR mem32' then return parameter # "Fixed in nasm_fixes"
322
+ when 'NEAR imm' then return parameter # "Fixed in nasm_fixes"
323
+ when 'imm:imm16' then return parameter # "Fixed in nasm_fixes"
324
+ when 'imm:imm32' then return parameter # "Fixed in nasm_fixes"
325
+ when '1' then return Immediate.new(1)
326
+ when 'AL' then return Register.on_id_bits(nil, 0, 8)
327
+ when 'AX' then return Register.on_id_bits(nil, 0, 16)
328
+ when 'EAX' then return Register.on_id_bits(nil, 0, 32)
329
+ when 'CL' then return Register.on_id_bits(nil, 1, 8)
330
+ when 'CX' then return Register.on_id_bits(nil, 1, 16)
331
+ when 'ECX' then return Register.on_id_bits(nil, 1, 32)
332
+ when 'DL' then return Register.on_id_bits(nil, 2, 8)
333
+ when 'DX' then return Register.on_id_bits(nil, 2, 16)
334
+ when 'EDX' then return Register.on_id_bits(nil, 2, 32)
335
+ when 'BL' then return Register.on_id_bits(nil, 3, 8)
336
+ when 'BX' then return Register.on_id_bits(nil, 3, 16)
337
+ when 'EBX' then return Register.on_id_bits(nil, 3, 32)
338
+ when 'ES' then return SegmentRegister.on_id(nil, 0)
339
+ when 'CS' then return SegmentRegister.on_id(nil, 1)
340
+ when 'SS' then return SegmentRegister.on_id(nil, 2)
341
+ when 'DS' then return SegmentRegister.on_id(nil, 3)
342
+ when 'FS' then return SegmentRegister.on_id(nil, 4)
343
+ when 'GS' then return SegmentRegister.on_id(nil, 5)
344
+ when 'imm' then return Immediate.new
345
+ when 'imm8' then return Immediate.new(8)
346
+ when 'imm16' then return Immediate.new(16)
347
+ when 'imm32' then return Immediate.new(32)
348
+ when 'segreg' then return SegmentRegister.new
349
+ when 'reg' then return Register.new
350
+ when 'reg8' then return Register.new(8)
351
+ when 'reg16' then return Register.new(16)
352
+ when 'reg32' then return Register.new(32)
353
+ when 'mem' then return Address.new(false, 4)
354
+ when 'mem8' then return Address.new(false, 8)
355
+ when 'mem16' then return Address.new(false, 16)
356
+ when 'mem32' then return Address.new(false, 32)
357
+ when 'mem64' then return Address.new(false, 64)
358
+ when 'mem80' then return Address.new(false, 80)
359
+ when 'memoffs8' then return Address.new(true, 8)
360
+ when 'memoffs16' then return Address.new(true, 16)
361
+ when 'memoffs32' then return Address.new(true, 32)
362
+ when 'fpureg' then return FPURegister.new
363
+ when /ST(.*)/ then return FPURegister.new($1.to_i)
364
+ when 'mmxreg' then return MMXRegister.new
365
+ when /MM(.*)/ then return MMXRegister.new($1.to_i)
366
+ when 'CR0/2/3/4' then return ControlRegister.new
367
+ when 'DR0/1/2/3/6/7' then return DebugRegister.new
368
+ when 'TR3/4/5/6/7' then return TestRegister.new
369
+ else
370
+ warn "unknown parameter: #{parameter.inspect}"
371
+ return parameter
372
+ end
373
+ end
374
+
375
+ def initialize_parameters params
376
+ self.parameters = params.split(/,/).map { |s| self.to_parameter s }
377
+ end
378
+
379
+ def assemble instruction
380
+ stream = []
381
+
382
+ opcodes.each_with_index do |each, index|
383
+ self.execute_instruction_position_on(each, instruction,
384
+ (index + 1) / opcodes.size, stream)
385
+ end
386
+
387
+ stream
388
+ end
389
+
390
+ def execute_instruction_position_on(byte, instruction, position, stream)
391
+ case byte
392
+ when 'a16', 'a32' then
393
+ raise "not done yet"
394
+ when 'o16' then
395
+ return self.align16_on(instruction, stream)
396
+ when 'o32' then
397
+ return self.align32_on(instruction, stream)
398
+ when 'ib' then
399
+ return stream.push_B(instruction.theImmediate)
400
+ when 'iw' then
401
+ return stream.push_W(instruction.theSecondImmediate) if position == 1
402
+ return stream.push_W(instruction.theImmediate)
403
+ when 'id' then
404
+ return stream.push_D(instruction.theSecondImmediate) if position == 1
405
+ return stream.push_D(instruction.theImmediate)
406
+ when 'rb' then
407
+ return self.relative_b_on(instruction, stream)
408
+ when 'rw' then
409
+ return self.relative_w_on(instruction, stream)
410
+ when 'rw/rd' then
411
+ return self.relative_w_on(instruction, stream) if
412
+ instruction.machine.bits == 16
413
+ return self.relative_d_on(instruction, stream)
414
+ when 'rd' then
415
+ return self.relative_d_on(instruction, stream)
416
+ when 'ow' then
417
+ raise byte
418
+ # [^stream push_W: instruction theAddress offset].
419
+ when 'od' then
420
+ raise byte
421
+ # [^stream push_D: instruction theAddress offset].
422
+ when 'ow/od' then
423
+ if instruction.machine.bits == 16 then
424
+ stream.push_W instruction.theAddress.offset
425
+ end
426
+
427
+ return stream.push_D(instruction.theAddress.offset)
428
+ when /^\/(.*)/ then
429
+ return self.modrm_instruction_on($1, instruction, stream)
430
+ end
431
+
432
+ number = byte.hex
433
+ number += instruction.parameters[parameters.first.id ? 1 : 0].id if
434
+ byte =~ /r$/
435
+ stream << number
436
+ end
437
+
438
+ ##
439
+ # If we get here, there will be at least two parameters to combine
440
+ # a memory address with a register or a register with a register"
441
+
442
+ def modrm_r_on instruction, stream
443
+ address, register = instruction.first, instruction.second
444
+ swap = false # TODO: this can be 1 call at the bottom
445
+
446
+ if instruction.first.register? && instruction.second.register? then
447
+ if parameters.first.memory_register? then
448
+ return instruction.first.push_mod_rm_on(instruction.second, stream)
449
+ else
450
+ return instruction.second.push_mod_rm_on(instruction.first, stream)
451
+ end
452
+ end
453
+
454
+ if instruction.first.special_register? then
455
+ return instruction.second.push_mod_rm_on(instruction.first, stream)
456
+ end
457
+
458
+ if instruction.second.special_register? then
459
+ return instruction.first.push_mod_rm_on(instruction.second, stream)
460
+ end
461
+
462
+ address, register = if instruction.first.register? && instruction.second.respond_to?(:push_mod_rm_on) then
463
+ [instruction.second, instruction.first]
464
+ else
465
+ [instruction.first, instruction.second]
466
+ end
467
+
468
+ address.push_mod_rm_on register, stream
469
+ end
470
+
471
+ def align16_on instruction, stream
472
+ stream << 0x66 if instruction.machine.bits != 16
473
+ end
474
+
475
+ def relative_x_on instruction, stream, msg, dist
476
+ offset = instruction.first
477
+ offset = offset.offset if offset.offset?
478
+
479
+ if offset.label? then
480
+ if offset.future_label? then
481
+ offset.add instruction.machine.stream.size
482
+ return stream.send(msg, dist)
483
+ end
484
+ offset = offset.position
485
+ end
486
+
487
+ stream.send(msg, -(instruction.machine.stream.size - offset + dist))
488
+ end
489
+
490
+ def relative_b_on instruction, stream
491
+ relative_x_on instruction, stream, :push_B, 2
492
+ end
493
+
494
+ def relative_d_on instruction, stream
495
+ relative_x_on instruction, stream, :push_D, 5
496
+ end
497
+
498
+ def relative_w_on instruction, stream
499
+ relative_x_on instruction, stream, :push_W, 3
500
+ end
501
+
502
+ def modrm_n_instruction_on id, instruction, stream
503
+ instruction.first.push_mod_rm_on Register.on_id_bits(instruction.machine, id, instruction.first.bits), stream
504
+ end
505
+
506
+ def align32_on instruction, stream
507
+ stream << 0x67 if instruction.machine.bits != 32
508
+ end
509
+
510
+ def modrm_instruction_on byte, instruction, stream
511
+ if byte == "r" then
512
+ self.modrm_r_on instruction, stream
513
+ else
514
+ self.modrm_n_instruction_on byte.to_i, instruction, stream
515
+ end
516
+ end
517
+ end
518
+
519
+ ##
520
+ # Instruction is an instruction shape that we're going to match to
521
+ # Commands to find out what we should write in to memory.
522
+
523
+ class Instruction
524
+ attr_accessor :opcode, :machine, :parameters
525
+
526
+ def self.on_message machine, message # TODO: remove
527
+ self.new message, machine
528
+ end
529
+
530
+ def initialize message, machine
531
+ self.machine = machine
532
+ self.opcode, *self.parameters = message
533
+ self.opcode = opcode.to_s.upcase
534
+
535
+ self.machine = parameters[1].machine unless machine
536
+
537
+ self.parameters.map! { |each| Proc === each ? each.call.m : each }
538
+
539
+ self.parameters.each do |each|
540
+ each.machine = self.machine if each.operand?
541
+ end
542
+ end
543
+
544
+ def first
545
+ parameters.first
546
+ end
547
+
548
+ def second
549
+ parameters.second
550
+ end
551
+
552
+ def theAddress
553
+ parameters.detect { |e| e.address? }
554
+ end
555
+
556
+ def assemble
557
+ instructions = machine.instructions.select { |command|
558
+ command.instruction_applies? self
559
+ }
560
+
561
+ return false if instructions.empty?
562
+
563
+ bytes = instructions.map { |instruction| instruction.assemble self }
564
+
565
+ sorted_bytes = bytes.sort_by {|byte| [byte.size, (byte[0]||0), (byte[1]||0)]}
566
+
567
+ machine.stream.push(*sorted_bytes.first)
568
+
569
+ true
570
+ end
571
+
572
+ def theSecondImmediate
573
+ parameters.detect { |e| e.immediate_value? }
574
+ end
575
+
576
+ def theImmediate
577
+ parameters.reverse.detect { |e| e.immediate_value? }
578
+ end
579
+ end
580
+
581
+ ##
582
+ # MachineCode is an abstract machine that has subclasses for each
583
+ # concrete machine type that you can write assembly language for.
584
+ # Right now this library only supports X86, so loko at
585
+ # MachineCodeX86 for more details on how to use it.
586
+
587
+ class MachineCode
588
+ attr_accessor :stream, :procedure, :bits, :cachedInstructions
589
+ attr_reader :processors
590
+ attr_writer :instructions
591
+
592
+ def initialize
593
+ self.procedure = nil
594
+ self.bits = self.defaultBits
595
+ self.processors = self.defaultProcessors
596
+ self.stream = []
597
+
598
+ self.setupMachine
599
+ end
600
+
601
+ def inspect
602
+ "#{self.class}#{stream.inspect}"
603
+ end
604
+
605
+ def processors= o
606
+ @processors = o
607
+ @cachedInstructions = nil
608
+ end
609
+
610
+ def supportsProcessor instructionProcessors
611
+ processors.any? { |e| instructionProcessors.include? e }
612
+ end
613
+
614
+ def instructions
615
+ self.cachedInstructions ||= @instructions.select { |e|
616
+ self.supportsProcessor e.processors
617
+ }
618
+ end
619
+
620
+ def method_missing msg, *args
621
+ super unless self.instructionFromMessage(msg, *args).assemble
622
+ end
623
+
624
+ def instructionFromMessage msg, *args
625
+ Instruction.on_message self, [msg, *args]
626
+ end
627
+
628
+ def label
629
+ Label.on_at(self, stream.size)
630
+ end
631
+
632
+ def future_label
633
+ FutureLabel.on self
634
+ end
635
+
636
+ def assemble instruction
637
+ raise "no"
638
+ # aBlock on: MessageNotUnderstood do: [:ex |
639
+ # ex originator class = BlockClosure ifFalse: [ex pass].
640
+ # ex resume: (ex originator value m perform: ex parameter selector withArguments: ex parameter arguments)]</body>
641
+ end
642
+
643
+ alias :setupMachine :subclass_responsibility
644
+ alias :platform :subclass_responsibility
645
+ alias :defaultBits :subclass_responsibility
646
+ end
647
+
648
+ ##
649
+ # MachineCodeX86 is a concrete implementation of a machine to create
650
+ # X86 assembly code on.
651
+ #
652
+ # You can use this class in two ways:
653
+ #
654
+ # a) you can instantiate an instance and use its register variables
655
+ # to build up machine code in the @stream variable and then use
656
+ # those bytes in any way that you see fit, or
657
+ #
658
+ # b) you can make a subclass of this class much like you do with
659
+ # ExternalInterface and put methods on the class that will
660
+ # compile in to assembler code that can be called from Smalltalk
661
+ # code
662
+ #
663
+ # == Using MachineCodeX86 for scripting
664
+ #
665
+ # This is the long hand way of writing assembly code, since you
666
+ # always include a receiver with every command.
667
+ #
668
+ # asm = Assembler.MachineCodeX86.new
669
+ #
670
+ # Once you have an assembler, you can access the registers and send
671
+ # commands to them, eg:
672
+ #
673
+ # asm.eax.mov 1
674
+ #
675
+ # As you send the commands, the @stream will build up containing the
676
+ # X86 assembler bytes you can use. You can use memory addresses in
677
+ # your assembler code with the #m method, eg:
678
+ #
679
+ # asm.eax.m.mov 1
680
+ #
681
+ # Once you are finished, you simply send:
682
+ #
683
+ # asm.stream
684
+ #
685
+ # This will return you the stream of bytes.
686
+ #
687
+ # == Labels & Jumps
688
+ #
689
+ # You can do labels and jump to them using two different label
690
+ # commands. The first is #label, which places a label jump point
691
+ # immediately on call, eg:
692
+ #
693
+ # label = asm.label
694
+ # label.jmp
695
+ #
696
+ # The other is a future label that can be placed at some future
697
+ # point in the program and jumped too
698
+ #
699
+ # label = asm.future_label
700
+ # asm.eax.xor asm.eax
701
+ # label.jmp
702
+ # asm.eax.inc
703
+ # label.plant
704
+ #
705
+ # You #plant the future label where you want it to actually be and
706
+ # past references to it will be updated. Future labels will always
707
+ # use a dword jmp so that there's space to fill in the command if
708
+ # the jmp ends up being far.
709
+
710
+ class MachineCodeX86 < MachineCode
711
+ # registers-general-32bit
712
+ attr_accessor :eax, :ebx, :ebp, :esp, :edi, :esi, :ecx, :edx
713
+
714
+ # registers-fpu
715
+ attr_accessor :st0, :st1, :st2, :st3, :st4, :st5, :st6, :st7
716
+
717
+ # registers-debug
718
+ attr_accessor :dr0, :dr1, :dr2, :dr3, :dr6, :dr7
719
+
720
+ # registers-segment
721
+ attr_accessor :es, :ss, :cs, :gs, :fs, :ds
722
+
723
+ # registers-test
724
+ attr_accessor :tr3, :tr4, :tr5, :tr6, :tr7
725
+
726
+ # registers-general-8bit
727
+ attr_accessor :al, :ah, :bl, :bh, :cl, :ch, :dl, :dh
728
+
729
+ # registers-general-16bit
730
+ attr_accessor :ax, :bx, :cx, :dx, :sp, :bp, :si, :di
731
+
732
+ # registers-control
733
+ attr_accessor :cr0, :cr2, :cr3, :cr4
734
+
735
+ # registers-mmx
736
+ attr_accessor :mm0, :mm1, :mm2, :mm3, :mm4, :mm5, :mm6, :mm7
737
+
738
+ def setupFPURegisters
739
+ self.st0 = FPURegister.on_id self, 0
740
+ self.st1 = FPURegister.on_id self, 1
741
+ self.st2 = FPURegister.on_id self, 2
742
+ self.st3 = FPURegister.on_id self, 3
743
+ self.st4 = FPURegister.on_id self, 4
744
+ self.st5 = FPURegister.on_id self, 5
745
+ self.st6 = FPURegister.on_id self, 6
746
+ self.st7 = FPURegister.on_id self, 7
747
+ end
748
+
749
+ def setupControlRegisters
750
+ self.cr0 = ControlRegister.on_id self, 0
751
+ self.cr2 = ControlRegister.on_id self, 2
752
+ self.cr3 = ControlRegister.on_id self, 3
753
+ self.cr4 = ControlRegister.on_id self, 4
754
+ end
755
+
756
+ def platform
757
+ 'i386'
758
+ end
759
+
760
+ def setupDebugRegisters
761
+ self.dr0 = DebugRegister.on_id self, 0
762
+ self.dr1 = DebugRegister.on_id self, 1
763
+ self.dr2 = DebugRegister.on_id self, 2
764
+ self.dr3 = DebugRegister.on_id self, 3
765
+ self.dr6 = DebugRegister.on_id self, 6
766
+ self.dr7 = DebugRegister.on_id self, 7
767
+ end
768
+
769
+ def defaultBits
770
+ 32
771
+ end
772
+
773
+ def setupSegmentRegisters
774
+ self.es = SegmentRegister.on_id self, 0
775
+ self.cs = SegmentRegister.on_id self, 1
776
+ self.ss = SegmentRegister.on_id self, 2
777
+ self.ds = SegmentRegister.on_id self, 3
778
+ self.fs = SegmentRegister.on_id self, 4
779
+ self.gs = SegmentRegister.on_id self, 5
780
+ end
781
+
782
+ def defaultProcessors
783
+ %w(8086 186 286 386 486 PENT P6 CYRIX FPU MMX PRIV UNDOC)
784
+ end
785
+
786
+ def setupMachine
787
+ self.instructions = Assembler.commands
788
+
789
+ self.setup8BitRegisters
790
+ self.setup16BitRegisters
791
+ self.setup32BitRegisters
792
+ self.setupSegmentRegisters
793
+ self.setupControlRegisters
794
+ self.setupTestRegisters
795
+ self.setupDebugRegisters
796
+ self.setupFPURegisters
797
+ self.setupMMXRegisters
798
+ end
799
+
800
+ def setup8BitRegisters
801
+ self.al = Register.on_id_bits self, 0, 8
802
+ self.cl = Register.on_id_bits self, 1, 8
803
+ self.dl = Register.on_id_bits self, 2, 8
804
+ self.bl = Register.on_id_bits self, 3, 8
805
+ self.ah = Register.on_id_bits self, 4, 8
806
+ self.ch = Register.on_id_bits self, 5, 8
807
+ self.dh = Register.on_id_bits self, 6, 8
808
+ self.bh = Register.on_id_bits self, 7, 8
809
+ end
810
+
811
+ def setup16BitRegisters
812
+ self.ax = Register.on_id_bits self, 0, 16
813
+ self.cx = Register.on_id_bits self, 1, 16
814
+ self.dx = Register.on_id_bits self, 2, 16
815
+ self.bx = Register.on_id_bits self, 3, 16
816
+ self.sp = Register.on_id_bits self, 4, 16
817
+ self.bp = Register.on_id_bits self, 5, 16
818
+ self.si = Register.on_id_bits self, 6, 16
819
+ self.di = Register.on_id_bits self, 7, 16
820
+ end
821
+
822
+ def setupMMXRegisters
823
+ self.mm0 = MMXRegister.on_id self, 0
824
+ self.mm1 = MMXRegister.on_id self, 1
825
+ self.mm2 = MMXRegister.on_id self, 2
826
+ self.mm3 = MMXRegister.on_id self, 3
827
+ self.mm4 = MMXRegister.on_id self, 4
828
+ self.mm5 = MMXRegister.on_id self, 5
829
+ self.mm6 = MMXRegister.on_id self, 6
830
+ self.mm7 = MMXRegister.on_id self, 7
831
+ end
832
+
833
+ def setupTestRegisters
834
+ self.tr3 = TestRegister.on_id self, 3
835
+ self.tr4 = TestRegister.on_id self, 4
836
+ self.tr5 = TestRegister.on_id self, 5
837
+ self.tr6 = TestRegister.on_id self, 6
838
+ self.tr7 = TestRegister.on_id self, 7
839
+ end
840
+
841
+ def setup32BitRegisters
842
+ self.eax = Register.on_id_bits self, 0, 32
843
+ self.ecx = Register.on_id_bits self, 1, 32
844
+ self.edx = Register.on_id_bits self, 2, 32
845
+ self.ebx = Register.on_id_bits self, 3, 32
846
+ self.esp = Register.on_id_bits self, 4, 32
847
+ self.ebp = Register.on_id_bits self, 5, 32
848
+ self.esi = Register.on_id_bits self, 6, 32
849
+ self.edi = Register.on_id_bits self, 7, 32
850
+ end
851
+ end
852
+
853
+ ##
854
+ # Operand is any kind of operand used in a command or instruction,
855
+ # eg: registers, memory addresses, labels, immediates, etc.
856
+
857
+ class Operand
858
+ attr_accessor :machine, :bits
859
+
860
+ def self.on machine
861
+ x = self.new
862
+ x.machine = machine
863
+ x
864
+ end
865
+
866
+ # TODO: fix _all_ initialize methods from here down to have cleaner args
867
+ def initialize bits = nil, machine = nil
868
+ @bits = bits
869
+ @machine = machine
870
+ end
871
+
872
+ def method_missing msg, *args, &b
873
+ super unless self.instructionFromMessage(msg, *args, &b).assemble
874
+ end
875
+
876
+ def instructionFromMessage msg, *args, &b
877
+ Instruction.on_message machine, [msg, self, *args] + (b ? [b] : [])
878
+ end
879
+
880
+ def operand?
881
+ true
882
+ end
883
+ end
884
+
885
+ ##
886
+ # Immediate is an Integer wrapper so that we know the machine we're
887
+ # dealing with when we apply commands
888
+
889
+ class Immediate < Operand
890
+ attr_accessor :value
891
+
892
+ def immediate?
893
+ true
894
+ end
895
+ end
896
+
897
+ ##
898
+ # Address is a memory address in one of the following example forms:
899
+ #
900
+ # eax, ebx + ecx, eax + 5, 23545, edx + eax + 2312
901
+
902
+ class Address < Operand
903
+ attr_accessor :id, :index
904
+ attr_reader :offset
905
+ attr_writer :isAssemblerOffset # FIX
906
+
907
+ def self.on_id_offset machine, id, offset
908
+ address = self.new
909
+ address.machine = machine
910
+ address.id = id
911
+ address.offset = offset
912
+ address
913
+ end
914
+
915
+ def initialize isAssemblerOffset = nil, bits = nil, id = nil
916
+ super(bits)
917
+
918
+ self.isAssemblerOffset = isAssemblerOffset
919
+ self.id = id
920
+
921
+ self.index = self.offset = nil
922
+ end
923
+
924
+ def bits
925
+ super || self.machine.bits
926
+ end
927
+
928
+ def offset= obj
929
+ if obj.register? then
930
+ @offset = 0
931
+ self.index = obj
932
+ else
933
+ @offset = obj
934
+ end
935
+ end
936
+
937
+ def + o # TODO: this seems totally and completely wrong
938
+ if o.register? then
939
+ self.index = o
940
+ else
941
+ self.offset = o
942
+ end
943
+ self
944
+ end
945
+
946
+ def address?
947
+ true
948
+ end
949
+
950
+ def offset?
951
+ @isAssemblerOffset.nil? ? id.nil? : @isAssemblerOffset
952
+ end
953
+
954
+ def push_mod_rm_on spareRegister, stream
955
+ if id.nil? then
956
+ stream << (0b00000101 + (spareRegister.id << 3))
957
+ return stream.push_D(offset)
958
+ end
959
+
960
+ modrm = case offset
961
+ when 0 then
962
+ 0b00000000
963
+ when 1..255 then
964
+ 0b01000000
965
+ else
966
+ 0b10000000
967
+ end
968
+
969
+ if index.nil? then
970
+ modrm += (spareRegister.id << 3)
971
+ else
972
+ stream << (0b00000100 + (spareRegister.id << 3))
973
+ modrm += (index.id << 3)
974
+ end
975
+
976
+ stream << modrm + id
977
+
978
+ return self if offset == 0
979
+ return stream.push_B(offset) if offset < 256
980
+
981
+ stream.push_D offset
982
+ end
983
+
984
+ def m
985
+ self
986
+ end
987
+ end
988
+
989
+ ##
990
+ # Register is a general X86 register, such as eax, ebx, ecx, edx,
991
+ # etc...
992
+
993
+ class Register < Operand
994
+ attr_accessor :id
995
+
996
+ def self.on_id_bits machine, id, bits
997
+ self.new bits, machine, id
998
+ end
999
+
1000
+ def initialize bits = nil, machine = nil, id = nil
1001
+ super(bits, machine)
1002
+ self.id = id
1003
+ end
1004
+
1005
+ def memory_register?
1006
+ false
1007
+ end
1008
+
1009
+ def register?
1010
+ true
1011
+ end
1012
+
1013
+ def get address # TODO: test
1014
+ self.mov address
1015
+ self.mov {self}
1016
+ end
1017
+
1018
+ def push_mod_rm_on spareRegister, stream
1019
+ stream << (0b11000000 + id + (spareRegister.id << 3))
1020
+ end
1021
+
1022
+ def m
1023
+ self + 0
1024
+ end
1025
+
1026
+ def - offset
1027
+ self + -offset
1028
+ end
1029
+
1030
+ def + offset
1031
+ Address.on_id_offset machine, id, offset
1032
+ end
1033
+ end
1034
+
1035
+ ##
1036
+ # MemoryRegister is a regular Register, but the parser needs to know
1037
+ # if it is a primary or secondary register. This form is a private
1038
+ # secondary register. Use Register instead of this guy.
1039
+
1040
+ class MemoryRegister < Register
1041
+ def memory_register?
1042
+ true
1043
+ end
1044
+ end
1045
+
1046
+ ##
1047
+ # Label is a known point in the byte stream that we can jmp/loop back to.
1048
+
1049
+ class Label < Operand
1050
+ attr_accessor :position
1051
+
1052
+ def self.on_at machine, position
1053
+ label = self.new
1054
+ label.machine = machine
1055
+ label.position = position
1056
+ label
1057
+ end
1058
+
1059
+ def bits
1060
+ distance = machine.stream.size - position
1061
+
1062
+ if distance < 256 then
1063
+ 8
1064
+ elsif distance < 65536 then
1065
+ 16
1066
+ else
1067
+ 32
1068
+ end
1069
+ end
1070
+
1071
+ def label?
1072
+ true
1073
+ end
1074
+ end
1075
+
1076
+ ##
1077
+ # FutureLabel is a label in memory that hasn't been defined yet and
1078
+ # will go back and fill in the appropriate memory bytes later
1079
+
1080
+ class FutureLabel < Label
1081
+ attr_accessor :positions
1082
+
1083
+ def initialize
1084
+ super
1085
+ self.positions = []
1086
+ end
1087
+
1088
+ def plant
1089
+ self.position = machine.stream.size
1090
+
1091
+ positions.each do |each|
1092
+ size = machine.stream[each + 1]
1093
+ address = []
1094
+ case size
1095
+ when 2 then
1096
+ address.push_B(position - each - 2)
1097
+ when 3 then
1098
+ address.push_W(position - each - 3)
1099
+ when 5 then
1100
+ address.push_D(position - each - 5)
1101
+ else
1102
+ raise "unhandled size #{size}"
1103
+ end
1104
+
1105
+ address.each_with_index do |byte, index|
1106
+ idx = each + index + 1
1107
+ machine.stream[idx] = byte
1108
+ end
1109
+ end
1110
+ end
1111
+
1112
+ def future_label?
1113
+ position.nil?
1114
+ end
1115
+
1116
+ def add aPosition
1117
+ positions << aPosition
1118
+ end
1119
+ end
1120
+
1121
+ ##
1122
+ # SpecialRegister is the abstract implementation of any kind of
1123
+ # register that isn't a general register, eg: segment registers, mmx
1124
+ # registers, fpu registers, etc...
1125
+
1126
+ class SpecialRegister < Operand
1127
+ attr_accessor :id
1128
+
1129
+ def self.on_id machine, id
1130
+ register = self.new
1131
+ register.machine = machine
1132
+ register.id = id
1133
+ register
1134
+ end
1135
+
1136
+ def special_register?
1137
+ true
1138
+ end
1139
+ end
1140
+
1141
+ ##
1142
+ # DebugRegister is an X86 DRx register
1143
+
1144
+ class DebugRegister < SpecialRegister
1145
+ end
1146
+
1147
+ ##
1148
+ # TestRegister is an X86 Test Register, TRx
1149
+
1150
+ class TestRegister < SpecialRegister
1151
+ end
1152
+
1153
+ ##
1154
+ # FPURegister is an X86 fpureg, STx
1155
+
1156
+ class FPURegister < SpecialRegister
1157
+ def initialize id = nil
1158
+ super()
1159
+ self.id = id
1160
+ end
1161
+ end
1162
+
1163
+ ##
1164
+ # ControlRegister is an X86 CRx register
1165
+
1166
+ class ControlRegister < SpecialRegister
1167
+ end
1168
+
1169
+ ##
1170
+ # MMXRegister is an X86 MMX register
1171
+
1172
+ class MMXRegister < SpecialRegister
1173
+ def push_mod_rm_on spareRegister, stream
1174
+ stream << (0b11000000 + id + (spareRegister.id << 3))
1175
+ end
1176
+ end
1177
+
1178
+ ##
1179
+ # SegmentRegister is an X86 segment register, eg: ss, cs, ds, es...
1180
+
1181
+ class SegmentRegister < SpecialRegister
1182
+ end
1183
+ end # module Wilson
1184
+
1185
+ __END__
1186
+
1187
+ # The Netwide Assembler: NASM
1188
+
1189
+ # Previous Chapter <nasmdo10.html> | Contents <nasmdoc0.html> | Index
1190
+ # <nasmdoci.html>
1191
+
1192
+
1193
+ # Appendix A: Intel x86 Instruction Reference
1194
+
1195
+ # This appendix provides a complete list of the machine instructions which
1196
+ # NASM will assemble, and a short description of the function of each one.
1197
+
1198
+ # It is not intended to be exhaustive documentation on the fine details of
1199
+ # the instructions' function, such as which exceptions they can trigger:
1200
+ # for such documentation, you should go to Intel's Web site,
1201
+ # |http://www.intel.com/|.
1202
+
1203
+ # Instead, this appendix is intended primarily to provide documentation on
1204
+ # the way the instructions may be used within NASM. For example, looking
1205
+ # up |LOOP| will tell you that NASM allows |CX| or |ECX| to be specified
1206
+ # as an optional second argument to the |LOOP| instruction, to enforce
1207
+ # which of the two possible counter registers should be used if the
1208
+ # default is not the one desired.
1209
+
1210
+ # The instructions are not quite listed in alphabetical order, since
1211
+ # groups of instructions with similar functions are lumped together in the
1212
+ # same entry. Most of them don't move very far from their alphabetic
1213
+ # position because of this.
1214
+
1215
+
1216
+ # A.1 Key to Operand Specifications
1217
+
1218
+ # The instruction descriptions in this appendix specify their operands
1219
+ # using the following notation:
1220
+
1221
+ # * Registers: |reg8| denotes an 8-bit general purpose register,
1222
+ # |reg16| denotes a 16-bit general purpose register, and |reg32| a
1223
+ # 32-bit one. |fpureg| denotes one of the eight FPU stack registers,
1224
+ # |mmxreg| denotes one of the eight 64-bit MMX registers, and
1225
+ # |segreg| denotes a segment register. In addition, some registers
1226
+ # (such as |AL|, |DX| or |ECX|) may be specified explicitly.
1227
+ # * Immediate operands: |imm| denotes a generic immediate operand.
1228
+ # |imm8|, |imm16| and |imm32| are used when the operand is intended
1229
+ # to be a specific size. For some of these instructions, NASM needs
1230
+ # an explicit specifier: for example, |ADD ESP,16| could be
1231
+ # interpreted as either |ADD r/m32,imm32| or |ADD r/m32,imm8|. NASM
1232
+ # chooses the former by default, and so you must specify |ADD
1233
+ # ESP,BYTE 16| for the latter.
1234
+ # * Memory references: |mem| denotes a generic memory reference;
1235
+ # |mem8|, |mem16|, |mem32|, |mem64| and |mem80| are used when the
1236
+ # operand needs to be a specific size. Again, a specifier is needed
1237
+ # in some cases: |DEC [address]| is ambiguous and will be rejected
1238
+ # by NASM. You must specify |DEC BYTE [address]|, |DEC WORD
1239
+ # [address]| or |DEC DWORD [address]| instead.
1240
+ # * Restricted memory references: one form of the |MOV| instruction
1241
+ # allows a memory address to be specified /without/ allowing the
1242
+ # normal range of register combinations and effective address
1243
+ # processing. This is denoted by |memoffs8|, |memoffs16| and
1244
+ # |memoffs32|.
1245
+ # * Register or memory choices: many instructions can accept either a
1246
+ # register /or/ a memory reference as an operand. |r/m8| is a
1247
+ # shorthand for |reg8/mem8|; similarly |r/m16| and |r/m32|. |r/m64|
1248
+ # is MMX-related, and is a shorthand for |mmxreg/mem64|.
1249
+
1250
+
1251
+ # A.2 Key to Opcode Descriptions
1252
+
1253
+ # This appendix also provides the opcodes which NASM will generate for
1254
+ # each form of each instruction. The opcodes are listed in the following way:
1255
+
1256
+ # * A hex number, such as |3F|, indicates a fixed byte containing that
1257
+ # number.
1258
+ # * A hex number followed by |+r|, such as |C8+r|, indicates that one
1259
+ # of the operands to the instruction is a register, and the
1260
+ # `register value' of that register should be added to the hex
1261
+ # number to produce the generated byte. For example, EDX has
1262
+ # register value 2, so the code |C8+r|, when the register operand is
1263
+ # EDX, generates the hex byte |CA|. Register values for specific
1264
+ # registers are given in section A.2.1 <#section-A.2.1>.
1265
+ # * A hex number followed by |+cc|, such as |40+cc|, indicates that
1266
+ # the instruction name has a condition code suffix, and the numeric
1267
+ # representation of the condition code should be added to the hex
1268
+ # number to produce the generated byte. For example, the code
1269
+ # |40+cc|, when the instruction contains the |NE| condition,
1270
+ # generates the hex byte |45|. Condition codes and their numeric
1271
+ # representations are given in section A.2.2 <#section-A.2.2>.
1272
+ # * A slash followed by a digit, such as |/2|, indicates that one of
1273
+ # the operands to the instruction is a memory address or register
1274
+ # (denoted |mem| or |r/m|, with an optional size). This is to be
1275
+ # encoded as an effective address, with a ModR/M byte, an optional
1276
+ # SIB byte, and an optional displacement, and the spare (register)
1277
+ # field of the ModR/M byte should be the digit given (which will be
1278
+ # from 0 to 7, so it fits in three bits). The encoding of effective
1279
+ # addresses is given in section A.2.3 <#section-A.2.3>.
1280
+ # * The code |/r| combines the above two: it indicates that one of the
1281
+ # operands is a memory address or |r/m|, and another is a register,
1282
+ # and that an effective address should be generated with the spare
1283
+ # (register) field in the ModR/M byte being equal to the `register
1284
+ # value' of the register operand. The encoding of effective
1285
+ # addresses is given in section A.2.3 <#section-A.2.3>; register
1286
+ # values are given in section A.2.1 <#section-A.2.1>.
1287
+ # * The codes |ib|, |iw| and |id| indicate that one of the operands to
1288
+ # the instruction is an immediate value, and that this is to be
1289
+ # encoded as a byte, little-endian word or little-endian doubleword
1290
+ # respectively.
1291
+ # * The codes |rb|, |rw| and |rd| indicate that one of the operands to
1292
+ # the instruction is an immediate value, and that the /difference/
1293
+ # between this value and the address of the end of the instruction
1294
+ # is to be encoded as a byte, word or doubleword respectively. Where
1295
+ # the form |rw/rd| appears, it indicates that either |rw| or |rd|
1296
+ # should be used according to whether assembly is being performed in
1297
+ # |BITS 16| or |BITS 32| state respectively.
1298
+ # * The codes |ow| and |od| indicate that one of the operands to the
1299
+ # instruction is a reference to the contents of a memory address
1300
+ # specified as an immediate value: this encoding is used in some
1301
+ # forms of the |MOV| instruction in place of the standard
1302
+ # effective-address mechanism. The displacement is encoded as a word
1303
+ # or doubleword. Again, |ow/od| denotes that |ow| or |od| should be
1304
+ # chosen according to the |BITS| setting.
1305
+ # * The codes |o16| and |o32| indicate that the given form of the
1306
+ # instruction should be assembled with operand size 16 or 32 bits.
1307
+ # In other words, |o16| indicates a |66| prefix in |BITS 32| state,
1308
+ # but generates no code in |BITS 16| state; and |o32| indicates a
1309
+ # |66| prefix in |BITS 16| state but generates nothing in |BITS 32|.
1310
+ # * The codes |a16| and |a32|, similarly to |o16| and |o32|, indicate
1311
+ # the address size of the given form of the instruction. Where this
1312
+ # does not match the |BITS| setting, a |67| prefix is required.
1313
+
1314
+
1315
+ # A.2.1 Register Values
1316
+
1317
+ # Where an instruction requires a register value, it is already implicit
1318
+ # in the encoding of the rest of the instruction what type of register is
1319
+ # intended: an 8-bit general-purpose register, a segment register, a debug
1320
+ # register, an MMX register, or whatever. Therefore there is no problem
1321
+ # with registers of different types sharing an encoding value.
1322
+
1323
+ # The encodings for the various classes of register are:
1324
+
1325
+ # * 8-bit general registers: |AL| is 0, |CL| is 1, |DL| is 2, |BL| is
1326
+ # 3, |AH| is 4, |CH| is 5, |DH| is 6, and |BH| is 7.
1327
+ # * 16-bit general registers: |AX| is 0, |CX| is 1, |DX| is 2, |BX| is
1328
+ # 3, |SP| is 4, |BP| is 5, |SI| is 6, and |DI| is 7.
1329
+ # * 32-bit general registers: |EAX| is 0, |ECX| is 1, |EDX| is 2,
1330
+ # |EBX| is 3, |ESP| is 4, |EBP| is 5, |ESI| is 6, and |EDI| is 7.
1331
+ # * Segment registers: |ES| is 0, |CS| is 1, |SS| is 2, |DS| is 3,
1332
+ # |FS| is 4, and |GS| is 5.
1333
+ # * {Floating-point registers}: |ST0| is 0, |ST1| is 1, |ST2| is 2,
1334
+ # |ST3| is 3, |ST4| is 4, |ST5| is 5, |ST6| is 6, and |ST7| is 7.
1335
+ # * 64-bit MMX registers: |MM0| is 0, |MM1| is 1, |MM2| is 2, |MM3| is
1336
+ # 3, |MM4| is 4, |MM5| is 5, |MM6| is 6, and |MM7| is 7.
1337
+ # * Control registers: |CR0| is 0, |CR2| is 2, |CR3| is 3, and |CR4|
1338
+ # is 4.
1339
+ # * Debug registers: |DR0| is 0, |DR1| is 1, |DR2| is 2, |DR3| is 3,
1340
+ # |DR6| is 6, and |DR7| is 7.
1341
+ # * Test registers: |TR3| is 3, |TR4| is 4, |TR5| is 5, |TR6| is 6,
1342
+ # and |TR7| is 7.
1343
+
1344
+ # (Note that wherever a register name contains a number, that number is
1345
+ # also the register value for that register.)
1346
+
1347
+
1348
+ # A.2.2 Condition Codes
1349
+
1350
+ # The available condition codes are given here, along with their numeric
1351
+ # representations as part of opcodes. Many of these condition codes have
1352
+ # synonyms, so several will be listed at a time.
1353
+
1354
+ # In the following descriptions, the word `either', when applied to two
1355
+ # possible trigger conditions, is used to mean `either or both'. If
1356
+ # `either but not both' is meant, the phrase `exactly one of' is used.
1357
+
1358
+ # * |O| is 0 (trigger if the overflow flag is set); |NO| is 1.
1359
+ # * |B|, |C| and |NAE| are 2 (trigger if the carry flag is set); |AE|,
1360
+ # |NB| and |NC| are 3.
1361
+ # * |E| and |Z| are 4 (trigger if the zero flag is set); |NE| and |NZ|
1362
+ # are 5.
1363
+ # * |BE| and |NA| are 6 (trigger if either of the carry or zero flags
1364
+ # is set); |A| and |NBE| are 7.
1365
+ # * |S| is 8 (trigger if the sign flag is set); |NS| is 9.
1366
+ # * |P| and |PE| are 10 (trigger if the parity flag is set); |NP| and
1367
+ # |PO| are 11.
1368
+ # * |L| and |NGE| are 12 (trigger if exactly one of the sign and
1369
+ # overflow flags is set); |GE| and |NL| are 13.
1370
+ # * |LE| and |NG| are 14 (trigger if either the zero flag is set, or
1371
+ # exactly one of the sign and overflow flags is set); |G| and |NLE|
1372
+ # are 15.
1373
+
1374
+ # Note that in all cases, the sense of a condition code may be reversed by
1375
+ # changing the low bit of the numeric representation.
1376
+
1377
+
1378
+ # A.2.3 Effective Address Encoding: ModR/M and SIB
1379
+
1380
+ # An effective address is encoded in up to three parts: a ModR/M byte, an
1381
+ # optional SIB byte, and an optional byte, word or doubleword displacement
1382
+ # field.
1383
+
1384
+ # The ModR/M byte consists of three fields: the |mod| field, ranging from
1385
+ # 0 to 3, in the upper two bits of the byte, the |r/m| field, ranging from
1386
+ # 0 to 7, in the lower three bits, and the spare (register) field in the
1387
+ # middle (bit 3 to bit 5). The spare field is not relevant to the
1388
+ # effective address being encoded, and either contains an extension to the
1389
+ # instruction opcode or the register value of another operand.
1390
+
1391
+ # The ModR/M system can be used to encode a direct register reference
1392
+ # rather than a memory access. This is always done by setting the |mod|
1393
+ # field to 3 and the |r/m| field to the register value of the register in
1394
+ # question (it must be a general-purpose register, and the size of the
1395
+ # register must already be implicit in the encoding of the rest of the
1396
+ # instruction). In this case, the SIB byte and displacement field are both
1397
+ # absent.
1398
+
1399
+ # In 16-bit addressing mode (either |BITS 16| with no |67| prefix, or
1400
+ # |BITS 32| with a |67| prefix), the SIB byte is never used. The general
1401
+ # rules for |mod| and |r/m| (there is an exception, given below) are:
1402
+
1403
+ # * The |mod| field gives the length of the displacement field: 0
1404
+ # means no displacement, 1 means one byte, and 2 means two bytes.
1405
+ # * The |r/m| field encodes the combination of registers to be added
1406
+ # to the displacement to give the accessed address: 0 means |BX+SI|,
1407
+ # 1 means |BX+DI|, 2 means |BP+SI|, 3 means |BP+DI|, 4 means |SI|
1408
+ # only, 5 means |DI| only, 6 means |BP| only, and 7 means |BX| only.
1409
+
1410
+ # However, there is a special case:
1411
+
1412
+ # * If |mod| is 0 and |r/m| is 6, the effective address encoded is not
1413
+ # |[BP]| as the above rules would suggest, but instead |[disp16]|:
1414
+ # the displacement field is present and is two bytes long, and no
1415
+ # registers are added to the displacement.
1416
+
1417
+ # Therefore the effective address |[BP]| cannot be encoded as efficiently
1418
+ # as |[BX]|; so if you code |[BP]| in a program, NASM adds a notional
1419
+ # 8-bit zero displacement, and sets |mod| to 1, |r/m| to 6, and the
1420
+ # one-byte displacement field to 0.
1421
+
1422
+ # In 32-bit addressing mode (either |BITS 16| with a |67| prefix, or |BITS
1423
+ # 32| with no |67| prefix) the general rules (again, there are exceptions)
1424
+ # for |mod| and |r/m| are:
1425
+
1426
+ # * The |mod| field gives the length of the displacement field: 0
1427
+ # means no displacement, 1 means one byte, and 2 means four bytes.
1428
+ # * If only one register is to be added to the displacement, and it is
1429
+ # not |ESP|, the |r/m| field gives its register value, and the SIB
1430
+ # byte is absent. If the |r/m| field is 4 (which would encode
1431
+ # |ESP|), the SIB byte is present and gives the combination and
1432
+ # scaling of registers to be added to the displacement.
1433
+
1434
+ # If the SIB byte is present, it describes the combination of registers
1435
+ # (an optional base register, and an optional index register scaled by
1436
+ # multiplication by 1, 2, 4 or 8) to be added to the displacement. The SIB
1437
+ # byte is divided into the |scale| field, in the top two bits, the |index|
1438
+ # field in the next three, and the |base| field in the bottom three. The
1439
+ # general rules are:
1440
+
1441
+ # * The |base| field encodes the register value of the base register.
1442
+ # * The |index| field encodes the register value of the index
1443
+ # register, unless it is 4, in which case no index register is used
1444
+ # (so |ESP| cannot be used as an index register).
1445
+ # * The |scale| field encodes the multiplier by which the index
1446
+ # register is scaled before adding it to the base and displacement:
1447
+ # 0 encodes a multiplier of 1, 1 encodes 2, 2 encodes 4 and 3
1448
+ # encodes 8.
1449
+
1450
+ # The exceptions to the 32-bit encoding rules are:
1451
+
1452
+ # * If |mod| is 0 and |r/m| is 5, the effective address encoded is not
1453
+ # |[EBP]| as the above rules would suggest, but instead |[disp32]|:
1454
+ # the displacement field is present and is four bytes long, and no
1455
+ # registers are added to the displacement.
1456
+ # * If |mod| is 0, |r/m| is 4 (meaning the SIB byte is present) and
1457
+ # |base| is 4, the effective address encoded is not |[EBP+index]| as
1458
+ # the above rules would suggest, but instead |[disp32+index]|: the
1459
+ # displacement field is present and is four bytes long, and there is
1460
+ # no base register (but the index register is still processed in the
1461
+ # normal way).
1462
+
1463
+
1464
+ # A.3 Key to Instruction Flags
1465
+
1466
+ # Given along with each instruction in this appendix is a set of flags,
1467
+ # denoting the type of the instruction. The types are as follows:
1468
+
1469
+ # * |8086|, |186|, |286|, |386|, |486|, |PENT| and |P6| denote the
1470
+ # lowest processor type that supports the instruction. Most
1471
+ # instructions run on all processors above the given type; those
1472
+ # that do not are documented. The Pentium II contains no additional
1473
+ # instructions beyond the P6 (Pentium Pro); from the point of view
1474
+ # of its instruction set, it can be thought of as a P6 with MMX
1475
+ # capability.
1476
+ # * |CYRIX| indicates that the instruction is specific to Cyrix
1477
+ # processors, for example the extra MMX instructions in the Cyrix
1478
+ # extended MMX instruction set.
1479
+ # * |FPU| indicates that the instruction is a floating-point one, and
1480
+ # will only run on machines with a coprocessor (automatically
1481
+ # including 486DX, Pentium and above).
1482
+ # * |MMX| indicates that the instruction is an MMX one, and will run
1483
+ # on MMX-capable Pentium processors and the Pentium II.
1484
+ # * |PRIV| indicates that the instruction is a protected-mode
1485
+ # management instruction. Many of these may only be used in
1486
+ # protected mode, or only at privilege level zero.
1487
+ # * |UNDOC| indicates that the instruction is an undocumented one, and
1488
+ # not part of the official Intel Architecture; it may or may not be
1489
+ # supported on any given machine.
1490
+
1491
+
1492
+ # A.4 |AAA|, |AAS|, |AAM|, |AAD|: ASCII Adjustments
1493
+
1494
+ # AAA ; 37 [8086]
1495
+
1496
+ # AAS ; 3F [8086]
1497
+
1498
+ # AAD ; D5 0A [8086]
1499
+ # AAD imm ; D5 ib [8086]
1500
+
1501
+ # AAM ; D4 0A [8086]
1502
+ # AAM imm ; D4 ib [8086]
1503
+
1504
+ # These instructions are used in conjunction with the add, subtract,
1505
+ # multiply and divide instructions to perform binary-coded decimal
1506
+ # arithmetic in /unpacked/ (one BCD digit per byte - easy to translate to
1507
+ # and from ASCII, hence the instruction names) form. There are also packed
1508
+ # BCD instructions |DAA| and |DAS|: see section A.23 <#section-A.23>.
1509
+
1510
+ # |AAA| should be used after a one-byte |ADD| instruction whose
1511
+ # destination was the |AL| register: by means of examining the value in
1512
+ # the low nibble of |AL| and also the auxiliary carry flag |AF|, it
1513
+ # determines whether the addition has overflowed, and adjusts it (and sets
1514
+ # the carry flag) if so. You can add long BCD strings together by doing
1515
+ # |ADD|/|AAA| on the low digits, then doing |ADC|/|AAA| on each subsequent
1516
+ # digit.
1517
+
1518
+ # |AAS| works similarly to |AAA|, but is for use after |SUB| instructions
1519
+ # rather than |ADD|.
1520
+
1521
+ # |AAM| is for use after you have multiplied two decimal digits together
1522
+ # and left the result in |AL|: it divides |AL| by ten and stores the
1523
+ # quotient in |AH|, leaving the remainder in |AL|. The divisor 10 can be
1524
+ # changed by specifying an operand to the instruction: a particularly
1525
+ # handy use of this is |AAM 16|, causing the two nibbles in |AL| to be
1526
+ # separated into |AH| and |AL|.
1527
+
1528
+ # |AAD| performs the inverse operation to |AAM|: it multiplies |AH| by
1529
+ # ten, adds it to |AL|, and sets |AH| to zero. Again, the multiplier 10
1530
+ # can be changed.
1531
+
1532
+
1533
+ # A.5 |ADC|: Add with Carry
1534
+
1535
+ # ADC r/m8,reg8 ; 10 /r [8086]
1536
+ # ADC r/m16,reg16 ; o16 11 /r [8086]
1537
+ # ADC r/m32,reg32 ; o32 11 /r [386]
1538
+
1539
+ # ADC reg8,r/m8 ; 12 /r [8086]
1540
+ # ADC reg16,r/m16 ; o16 13 /r [8086]
1541
+ # ADC reg32,r/m32 ; o32 13 /r [386]
1542
+
1543
+ # ADC r/m8,imm8 ; 80 /2 ib [8086]
1544
+ # ADC r/m16,imm16 ; o16 81 /2 iw [8086]
1545
+ # ADC r/m32,imm32 ; o32 81 /2 id [386]
1546
+
1547
+ # ADC r/m16,imm8 ; o16 83 /2 ib [8086]
1548
+ # ADC r/m32,imm8 ; o32 83 /2 ib [386]
1549
+
1550
+ # ADC AL,imm8 ; 14 ib [8086]
1551
+ # ADC AX,imm16 ; o16 15 iw [8086]
1552
+ # ADC EAX,imm32 ; o32 15 id [386]
1553
+
1554
+ # |ADC| performs integer addition: it adds its two operands together, plus
1555
+ # the value of the carry flag, and leaves the result in its destination
1556
+ # (first) operand. The flags are set according to the result of the
1557
+ # operation: in particular, the carry flag is affected and can be used by
1558
+ # a subsequent |ADC| instruction.
1559
+
1560
+ # In the forms with an 8-bit immediate second operand and a longer first
1561
+ # operand, the second operand is considered to be signed, and is
1562
+ # sign-extended to the length of the first operand. In these cases, the
1563
+ # |BYTE| qualifier is necessary to force NASM to generate this form of the
1564
+ # instruction.
1565
+
1566
+ # To add two numbers without also adding the contents of the carry flag,
1567
+ # use |ADD| (section A.6 <#section-A.6>).
1568
+
1569
+
1570
+ # A.6 |ADD|: Add Integers
1571
+
1572
+ # ADD r/m8,reg8 ; 00 /r [8086]
1573
+ # ADD r/m16,reg16 ; o16 01 /r [8086]
1574
+ # ADD r/m32,reg32 ; o32 01 /r [386]
1575
+
1576
+ # ADD reg8,r/m8 ; 02 /r [8086]
1577
+ # ADD reg16,r/m16 ; o16 03 /r [8086]
1578
+ # ADD reg32,r/m32 ; o32 03 /r [386]
1579
+
1580
+ # ADD r/m8,imm8 ; 80 /0 ib [8086]
1581
+ # ADD r/m16,imm16 ; o16 81 /0 iw [8086]
1582
+ # ADD r/m32,imm32 ; o32 81 /0 id [386]
1583
+
1584
+ # ADD r/m16,imm8 ; o16 83 /0 ib [8086]
1585
+ # ADD r/m32,imm8 ; o32 83 /0 ib [386]
1586
+
1587
+ # ADD AL,imm8 ; 04 ib [8086]
1588
+ # ADD AX,imm16 ; o16 05 iw [8086]
1589
+ # ADD EAX,imm32 ; o32 05 id [386]
1590
+
1591
+ # |ADD| performs integer addition: it adds its two operands together, and
1592
+ # leaves the result in its destination (first) operand. The flags are set
1593
+ # according to the result of the operation: in particular, the carry flag
1594
+ # is affected and can be used by a subsequent |ADC| instruction (section
1595
+ # A.5 <#section-A.5>).
1596
+
1597
+ # In the forms with an 8-bit immediate second operand and a longer first
1598
+ # operand, the second operand is considered to be signed, and is
1599
+ # sign-extended to the length of the first operand. In these cases, the
1600
+ # |BYTE| qualifier is necessary to force NASM to generate this form of the
1601
+ # instruction.
1602
+
1603
+
1604
+ # A.7 |AND|: Bitwise AND
1605
+
1606
+ # AND r/m8,reg8 ; 20 /r [8086]
1607
+ # AND r/m16,reg16 ; o16 21 /r [8086]
1608
+ # AND r/m32,reg32 ; o32 21 /r [386]
1609
+
1610
+ # AND reg8,r/m8 ; 22 /r [8086]
1611
+ # AND reg16,r/m16 ; o16 23 /r [8086]
1612
+ # AND reg32,r/m32 ; o32 23 /r [386]
1613
+
1614
+ # AND r/m8,imm8 ; 80 /4 ib [8086]
1615
+ # AND r/m16,imm16 ; o16 81 /4 iw [8086]
1616
+ # AND r/m32,imm32 ; o32 81 /4 id [386]
1617
+
1618
+ # AND r/m16,imm8 ; o16 83 /4 ib [8086]
1619
+ # AND r/m32,imm8 ; o32 83 /4 ib [386]
1620
+
1621
+ # AND AL,imm8 ; 24 ib [8086]
1622
+ # AND AX,imm16 ; o16 25 iw [8086]
1623
+ # AND EAX,imm32 ; o32 25 id [386]
1624
+
1625
+ # |AND| performs a bitwise AND operation between its two operands (i.e.
1626
+ # each bit of the result is 1 if and only if the corresponding bits of the
1627
+ # two inputs were both 1), and stores the result in the destination
1628
+ # (first) operand.
1629
+
1630
+ # In the forms with an 8-bit immediate second operand and a longer first
1631
+ # operand, the second operand is considered to be signed, and is
1632
+ # sign-extended to the length of the first operand. In these cases, the
1633
+ # |BYTE| qualifier is necessary to force NASM to generate this form of the
1634
+ # instruction.
1635
+
1636
+ # The MMX instruction |PAND| (see section A.116 <#section-A.116>) performs
1637
+ # the same operation on the 64-bit MMX registers.
1638
+
1639
+
1640
+ # A.8 |ARPL|: Adjust RPL Field of Selector
1641
+
1642
+ # ARPL r/m16,reg16 ; 63 /r [286,PRIV]
1643
+
1644
+ # |ARPL| expects its two word operands to be segment selectors. It adjusts
1645
+ # the RPL (requested privilege level - stored in the bottom two bits of
1646
+ # the selector) field of the destination (first) operand to ensure that it
1647
+ # is no less (i.e. no more privileged than) the RPL field of the source
1648
+ # operand. The zero flag is set if and only if a change had to be made.
1649
+
1650
+
1651
+ # A.9 |BOUND|: Check Array Index against Bounds
1652
+
1653
+ # BOUND reg16,mem ; o16 62 /r [186]
1654
+ # BOUND reg32,mem ; o32 62 /r [386]
1655
+
1656
+ # |BOUND| expects its second operand to point to an area of memory
1657
+ # containing two signed values of the same size as its first operand (i.e.
1658
+ # two words for the 16-bit form; two doublewords for the 32-bit form). It
1659
+ # performs two signed comparisons: if the value in the register passed as
1660
+ # its first operand is less than the first of the in-memory values, or is
1661
+ # greater than or equal to the second, it throws a BR exception.
1662
+ # Otherwise, it does nothing.
1663
+
1664
+
1665
+ # A.10 |BSF|, |BSR|: Bit Scan
1666
+
1667
+ # BSF reg16,r/m16 ; o16 0F BC /r [386]
1668
+ # BSF reg32,r/m32 ; o32 0F BC /r [386]
1669
+
1670
+ # BSR reg16,r/m16 ; o16 0F BD /r [386]
1671
+ # BSR reg32,r/m32 ; o32 0F BD /r [386]
1672
+
1673
+ # |BSF| searches for a set bit in its source (second) operand, starting
1674
+ # from the bottom, and if it finds one, stores the index in its
1675
+ # destination (first) operand. If no set bit is found, the contents of the
1676
+ # destination operand are undefined.
1677
+
1678
+ # |BSR| performs the same function, but searches from the top instead, so
1679
+ # it finds the most significant set bit.
1680
+
1681
+ # Bit indices are from 0 (least significant) to 15 or 31 (most significant).
1682
+
1683
+
1684
+ # A.11 |BSWAP|: Byte Swap
1685
+
1686
+ # BSWAP reg32 ; o32 0F C8+r [486]
1687
+
1688
+ # |BSWAP| swaps the order of the four bytes of a 32-bit register: bits 0-7
1689
+ # exchange places with bits 24-31, and bits 8-15 swap with bits 16-23.
1690
+ # There is no explicit 16-bit equivalent: to byte-swap |AX|, |BX|, |CX| or
1691
+ # |DX|, |XCHG| can be used.
1692
+
1693
+
1694
+ # A.12 |BT|, |BTC|, |BTR|, |BTS|: Bit Test
1695
+
1696
+ # BT r/m16,reg16 ; o16 0F A3 /r [386]
1697
+ # BT r/m32,reg32 ; o32 0F A3 /r [386]
1698
+ # BT r/m16,imm8 ; o16 0F BA /4 ib [386]
1699
+ # BT r/m32,imm8 ; o32 0F BA /4 ib [386]
1700
+
1701
+ # BTC r/m16,reg16 ; o16 0F BB /r [386]
1702
+ # BTC r/m32,reg32 ; o32 0F BB /r [386]
1703
+ # BTC r/m16,imm8 ; o16 0F BA /7 ib [386]
1704
+ # BTC r/m32,imm8 ; o32 0F BA /7 ib [386]
1705
+
1706
+ # BTR r/m16,reg16 ; o16 0F B3 /r [386]
1707
+ # BTR r/m32,reg32 ; o32 0F B3 /r [386]
1708
+ # BTR r/m16,imm8 ; o16 0F BA /6 ib [386]
1709
+ # BTR r/m32,imm8 ; o32 0F BA /6 ib [386]
1710
+
1711
+ # BTS r/m16,reg16 ; o16 0F AB /r [386]
1712
+ # BTS r/m32,reg32 ; o32 0F AB /r [386]
1713
+ # BTS r/m16,imm ; o16 0F BA /5 ib [386]
1714
+ # BTS r/m32,imm ; o32 0F BA /5 ib [386]
1715
+
1716
+ # These instructions all test one bit of their first operand, whose index
1717
+ # is given by the second operand, and store the value of that bit into the
1718
+ # carry flag. Bit indices are from 0 (least significant) to 15 or 31 (most
1719
+ # significant).
1720
+
1721
+ # In addition to storing the original value of the bit into the carry
1722
+ # flag, |BTR| also resets (clears) the bit in the operand itself. |BTS|
1723
+ # sets the bit, and |BTC| complements the bit. |BT| does not modify its
1724
+ # operands.
1725
+
1726
+ # The bit offset should be no greater than the size of the operand.
1727
+
1728
+
1729
+ # A.13 |CALL|: Call Subroutine
1730
+
1731
+ # CALL imm ; E8 rw/rd [8086]
1732
+ # CALL imm:imm16 ; o16 9A iw iw [8086]
1733
+ # CALL imm:imm32 ; o32 9A id iw [386]
1734
+ # CALL FAR mem16 ; o16 FF /3 [8086]
1735
+ # CALL FAR mem32 ; o32 FF /3 [386]
1736
+ # CALL r/m16 ; o16 FF /2 [8086]
1737
+ # CALL r/m32 ; o32 FF /2 [386]
1738
+
1739
+ # |CALL| calls a subroutine, by means of pushing the current instruction
1740
+ # pointer (|IP|) and optionally |CS| as well on the stack, and then
1741
+ # jumping to a given address.
1742
+
1743
+ # |CS| is pushed as well as |IP| if and only if the call is a far call,
1744
+ # i.e. a destination segment address is specified in the instruction. The
1745
+ # forms involving two colon-separated arguments are far calls; so are the
1746
+ # |CALL FAR mem| forms.
1747
+
1748
+ # You can choose between the two immediate far call forms (|CALL imm:imm|)
1749
+ # by the use of the |WORD| and |DWORD| keywords: |CALL WORD
1750
+ # 0x1234:0x5678|) or |CALL DWORD 0x1234:0x56789abc|.
1751
+
1752
+ # The |CALL FAR mem| forms execute a far call by loading the destination
1753
+ # address out of memory. The address loaded consists of 16 or 32 bits of
1754
+ # offset (depending on the operand size), and 16 bits of segment. The
1755
+ # operand size may be overridden using |CALL WORD FAR mem| or |CALL DWORD
1756
+ # FAR mem|.
1757
+
1758
+ # The |CALL r/m| forms execute a near call (within the same segment),
1759
+ # loading the destination address out of memory or out of a register. The
1760
+ # keyword |NEAR| may be specified, for clarity, in these forms, but is not
1761
+ # necessary. Again, operand size can be overridden using |CALL WORD mem|
1762
+ # or |CALL DWORD mem|.
1763
+
1764
+ # As a convenience, NASM does not require you to call a far procedure
1765
+ # symbol by coding the cumbersome |CALL SEG routine:routine|, but instead
1766
+ # allows the easier synonym |CALL FAR routine|.
1767
+
1768
+ # The |CALL r/m| forms given above are near calls; NASM will accept the
1769
+ # |NEAR| keyword (e.g. |CALL NEAR [address]|), even though it is not
1770
+ # strictly necessary.
1771
+
1772
+
1773
+ # A.14 |CBW|, |CWD|, |CDQ|, |CWDE|: Sign Extensions
1774
+
1775
+ # CBW ; o16 98 [8086]
1776
+ # CWD ; o16 99 [8086]
1777
+ # CDQ ; o32 99 [386]
1778
+ # CWDE ; o32 98 [386]
1779
+
1780
+ # All these instructions sign-extend a short value into a longer one, by
1781
+ # replicating the top bit of the original value to fill the extended one.
1782
+
1783
+ # |CBW| extends |AL| into |AX| by repeating the top bit of |AL| in every
1784
+ # bit of |AH|. |CWD| extends |AX| into |DX:AX| by repeating the top bit of
1785
+ # |AX| throughout |DX|. |CWDE| extends |AX| into |EAX|, and |CDQ| extends
1786
+ # |EAX| into |EDX:EAX|.
1787
+
1788
+
1789
+ # A.15 |CLC|, |CLD|, |CLI|, |CLTS|: Clear Flags
1790
+
1791
+ # CLC ; F8 [8086]
1792
+ # CLD ; FC [8086]
1793
+ # CLI ; FA [8086]
1794
+ # CLTS ; 0F 06 [286,PRIV]
1795
+
1796
+ # These instructions clear various flags. |CLC| clears the carry flag;
1797
+ # |CLD| clears the direction flag; |CLI| clears the interrupt flag (thus
1798
+ # disabling interrupts); and |CLTS| clears the task-switched (|TS|) flag
1799
+ # in |CR0|.
1800
+
1801
+ # To set the carry, direction, or interrupt flags, use the |STC|, |STD|
1802
+ # and |STI| instructions (section A.156 <#section-A.156>). To invert the
1803
+ # carry flag, use |CMC| (section A.16 <#section-A.16>).
1804
+
1805
+
1806
+ # A.16 |CMC|: Complement Carry Flag
1807
+
1808
+ # CMC ; F5 [8086]
1809
+
1810
+ # |CMC| changes the value of the carry flag: if it was 0, it sets it to 1,
1811
+ # and vice versa.
1812
+
1813
+
1814
+ # A.17 |CMOVcc|: Conditional Move
1815
+
1816
+ # CMOVcc reg16,r/m16 ; o16 0F 40+cc /r [P6]
1817
+ # CMOVcc reg32,r/m32 ; o32 0F 40+cc /r [P6]
1818
+
1819
+ # |CMOV| moves its source (second) operand into its destination (first)
1820
+ # operand if the given condition code is satisfied; otherwise it does
1821
+ # nothing.
1822
+
1823
+ # For a list of condition codes, see section A.2.2 <#section-A.2.2>.
1824
+
1825
+ # Although the |CMOV| instructions are flagged |P6| above, they may not be
1826
+ # supported by all Pentium Pro processors; the |CPUID| instruction
1827
+ # (section A.22 <#section-A.22>) will return a bit which indicates whether
1828
+ # conditional moves are supported.
1829
+
1830
+
1831
+ # A.18 |CMP|: Compare Integers
1832
+
1833
+ # CMP r/m8,reg8 ; 38 /r [8086]
1834
+ # CMP r/m16,reg16 ; o16 39 /r [8086]
1835
+ # CMP r/m32,reg32 ; o32 39 /r [386]
1836
+
1837
+ # CMP reg8,r/m8 ; 3A /r [8086]
1838
+ # CMP reg16,r/m16 ; o16 3B /r [8086]
1839
+ # CMP reg32,r/m32 ; o32 3B /r [386]
1840
+
1841
+ # CMP r/m8,imm8 ; 80 /0 ib [8086]
1842
+ # CMP r/m16,imm16 ; o16 81 /0 iw [8086]
1843
+ # CMP r/m32,imm32 ; o32 81 /0 id [386]
1844
+
1845
+ # CMP r/m16,imm8 ; o16 83 /0 ib [8086]
1846
+ # CMP r/m32,imm8 ; o32 83 /0 ib [386]
1847
+
1848
+ # CMP AL,imm8 ; 3C ib [8086]
1849
+ # CMP AX,imm16 ; o16 3D iw [8086]
1850
+ # CMP EAX,imm32 ; o32 3D id [386]
1851
+
1852
+ # |CMP| performs a `mental' subtraction of its second operand from its
1853
+ # first operand, and affects the flags as if the subtraction had taken
1854
+ # place, but does not store the result of the subtraction anywhere.
1855
+
1856
+ # In the forms with an 8-bit immediate second operand and a longer first
1857
+ # operand, the second operand is considered to be signed, and is
1858
+ # sign-extended to the length of the first operand. In these cases, the
1859
+ # |BYTE| qualifier is necessary to force NASM to generate this form of the
1860
+ # instruction.
1861
+
1862
+
1863
+ # A.19 |CMPSB|, |CMPSW|, |CMPSD|: Compare Strings
1864
+
1865
+ # CMPSB ; A6 [8086]
1866
+ # CMPSW ; o16 A7 [8086]
1867
+ # CMPSD ; o32 A7 [386]
1868
+
1869
+ # |CMPSB| compares the byte at |[DS:SI]| or |[DS:ESI]| with the byte at
1870
+ # |[ES:DI]| or |[ES:EDI]|, and sets the flags accordingly. It then
1871
+ # increments or decrements (depending on the direction flag: increments if
1872
+ # the flag is clear, decrements if it is set) |SI| and |DI| (or |ESI| and
1873
+ # |EDI|).
1874
+
1875
+ # The registers used are |SI| and |DI| if the address size is 16 bits, and
1876
+ # |ESI| and |EDI| if it is 32 bits. If you need to use an address size not
1877
+ # equal to the current |BITS| setting, you can use an explicit |a16| or
1878
+ # |a32| prefix.
1879
+
1880
+ # The segment register used to load from |[SI]| or |[ESI]| can be
1881
+ # overridden by using a segment register name as a prefix (for example,
1882
+ # |es cmpsb|). The use of |ES| for the load from |[DI]| or |[EDI]| cannot
1883
+ # be overridden.
1884
+
1885
+ # |CMPSW| and |CMPSD| work in the same way, but they compare a word or a
1886
+ # doubleword instead of a byte, and increment or decrement the addressing
1887
+ # registers by 2 or 4 instead of 1.
1888
+
1889
+ # The |REPE| and |REPNE| prefixes (equivalently, |REPZ| and |REPNZ|) may
1890
+ # be used to repeat the instruction up to |CX| (or |ECX| - again, the
1891
+ # address size chooses which) times until the first unequal or equal byte
1892
+ # is found.
1893
+
1894
+
1895
+ # A.20 |CMPXCHG|, |CMPXCHG486|: Compare and Exchange
1896
+
1897
+ # CMPXCHG r/m8,reg8 ; 0F B0 /r [PENT]
1898
+ # CMPXCHG r/m16,reg16 ; o16 0F B1 /r [PENT]
1899
+ # CMPXCHG r/m32,reg32 ; o32 0F B1 /r [PENT]
1900
+
1901
+ # CMPXCHG486 r/m8,reg8 ; 0F A6 /r [486,UNDOC]
1902
+ # CMPXCHG486 r/m16,reg16 ; o16 0F A7 /r [486,UNDOC]
1903
+ # CMPXCHG486 r/m32,reg32 ; o32 0F A7 /r [486,UNDOC]
1904
+
1905
+ # These two instructions perform exactly the same operation; however,
1906
+ # apparently some (not all) 486 processors support it under a non-standard
1907
+ # opcode, so NASM provides the undocumented |CMPXCHG486| form to generate
1908
+ # the non-standard opcode.
1909
+
1910
+ # |CMPXCHG| compares its destination (first) operand to the value in |AL|,
1911
+ # |AX| or |EAX| (depending on the size of the instruction). If they are
1912
+ # equal, it copies its source (second) operand into the destination and
1913
+ # sets the zero flag. Otherwise, it clears the zero flag and leaves the
1914
+ # destination alone.
1915
+
1916
+ # |CMPXCHG| is intended to be used for atomic operations in multitasking
1917
+ # or multiprocessor environments. To safely update a value in shared
1918
+ # memory, for example, you might load the value into |EAX|, load the
1919
+ # updated value into |EBX|, and then execute the instruction |lock cmpxchg
1920
+ # [value],ebx|. If |value| has not changed since being loaded, it is
1921
+ # updated with your desired new value, and the zero flag is set to let you
1922
+ # know it has worked. (The |LOCK| prefix prevents another processor doing
1923
+ # anything in the middle of this operation: it guarantees atomicity.)
1924
+ # However, if another processor has modified the value in between your
1925
+ # load and your attempted store, the store does not happen, and you are
1926
+ # notified of the failure by a cleared zero flag, so you can go round and
1927
+ # try again.
1928
+
1929
+
1930
+ # A.21 |CMPXCHG8B|: Compare and Exchange Eight Bytes
1931
+
1932
+ # CMPXCHG8B mem ; 0F C7 /1 [PENT]
1933
+
1934
+ # This is a larger and more unwieldy version of |CMPXCHG|: it compares the
1935
+ # 64-bit (eight-byte) value stored at |[mem]| with the value in |EDX:EAX|.
1936
+ # If they are equal, it sets the zero flag and stores |ECX:EBX| into the
1937
+ # memory area. If they are unequal, it clears the zero flag and leaves the
1938
+ # memory area untouched.
1939
+
1940
+
1941
+ # A.22 |CPUID|: Get CPU Identification Code
1942
+
1943
+ # CPUID ; 0F A2 [PENT]
1944
+
1945
+ # |CPUID| returns various information about the processor it is being
1946
+ # executed on. It fills the four registers |EAX|, |EBX|, |ECX| and |EDX|
1947
+ # with information, which varies depending on the input contents of |EAX|.
1948
+
1949
+ # |CPUID| also acts as a barrier to serialise instruction execution:
1950
+ # executing the |CPUID| instruction guarantees that all the effects
1951
+ # (memory modification, flag modification, register modification) of
1952
+ # previous instructions have been completed before the next instruction
1953
+ # gets fetched.
1954
+
1955
+ # The information returned is as follows:
1956
+
1957
+ # * If |EAX| is zero on input, |EAX| on output holds the maximum
1958
+ # acceptable input value of |EAX|, and |EBX:EDX:ECX| contain the
1959
+ # string |"GenuineIntel"| (or not, if you have a clone processor).
1960
+ # That is to say, |EBX| contains |"Genu"| (in NASM's own sense of
1961
+ # character constants, described in section 3.4.2
1962
+ # <nasmdoc3.html#section-3.4.2>), |EDX| contains |"ineI"| and |ECX|
1963
+ # contains |"ntel"|.
1964
+ # * If |EAX| is one on input, |EAX| on output contains version
1965
+ # information about the processor, and |EDX| contains a set of
1966
+ # feature flags, showing the presence and absence of various
1967
+ # features. For example, bit 8 is set if the |CMPXCHG8B| instruction
1968
+ # (section A.21 <#section-A.21>) is supported, bit 15 is set if the
1969
+ # conditional move instructions (section A.17 <#section-A.17> and
1970
+ # section A.34 <#section-A.34>) are supported, and bit 23 is set if
1971
+ # MMX instructions are supported.
1972
+ # * If |EAX| is two on input, |EAX|, |EBX|, |ECX| and |EDX| all
1973
+ # contain information about caches and TLBs (Translation Lookahead
1974
+ # Buffers).
1975
+
1976
+ # For more information on the data returned from |CPUID|, see the
1977
+ # documentation on Intel's web site.
1978
+
1979
+
1980
+ # A.23 |DAA|, |DAS|: Decimal Adjustments
1981
+
1982
+ # DAA ; 27 [8086]
1983
+ # DAS ; 2F [8086]
1984
+
1985
+ # These instructions are used in conjunction with the add and subtract
1986
+ # instructions to perform binary-coded decimal arithmetic in /packed/ (one
1987
+ # BCD digit per nibble) form. For the unpacked equivalents, see section
1988
+ # A.4 <#section-A.4>.
1989
+
1990
+ # |DAA| should be used after a one-byte |ADD| instruction whose
1991
+ # destination was the |AL| register: by means of examining the value in
1992
+ # the |AL| and also the auxiliary carry flag |AF|, it determines whether
1993
+ # either digit of the addition has overflowed, and adjusts it (and sets
1994
+ # the carry and auxiliary-carry flags) if so. You can add long BCD strings
1995
+ # together by doing |ADD|/|DAA| on the low two digits, then doing
1996
+ # |ADC|/|DAA| on each subsequent pair of digits.
1997
+
1998
+ # |DAS| works similarly to |DAA|, but is for use after |SUB| instructions
1999
+ # rather than |ADD|.
2000
+
2001
+
2002
+ # A.24 |DEC|: Decrement Integer
2003
+
2004
+ # DEC reg16 ; o16 48+r [8086]
2005
+ # DEC reg32 ; o32 48+r [386]
2006
+ # DEC r/m8 ; FE /1 [8086]
2007
+ # DEC r/m16 ; o16 FF /1 [8086]
2008
+ # DEC r/m32 ; o32 FF /1 [386]
2009
+
2010
+ # |DEC| subtracts 1 from its operand. It does /not/ affect the carry flag:
2011
+ # to affect the carry flag, use |SUB something,1| (see section A.159
2012
+ # <#section-A.159>). See also |INC| (section A.79 <#section-A.79>).
2013
+
2014
+
2015
+ # A.25 |DIV|: Unsigned Integer Divide
2016
+
2017
+ # DIV r/m8 ; F6 /6 [8086]
2018
+ # DIV r/m16 ; o16 F7 /6 [8086]
2019
+ # DIV r/m32 ; o32 F7 /6 [386]
2020
+
2021
+ # |DIV| performs unsigned integer division. The explicit operand provided
2022
+ # is the divisor; the dividend and destination operands are implicit, in
2023
+ # the following way:
2024
+
2025
+ # * For |DIV r/m8|, |AX| is divided by the given operand; the quotient
2026
+ # is stored in |AL| and the remainder in |AH|.
2027
+ # * For |DIV r/m16|, |DX:AX| is divided by the given operand; the
2028
+ # quotient is stored in |AX| and the remainder in |DX|.
2029
+ # * For |DIV r/m32|, |EDX:EAX| is divided by the given operand; the
2030
+ # quotient is stored in |EAX| and the remainder in |EDX|.
2031
+
2032
+ # Signed integer division is performed by the |IDIV| instruction: see
2033
+ # section A.76 <#section-A.76>.
2034
+
2035
+
2036
+ # A.26 |EMMS|: Empty MMX State
2037
+
2038
+ # MMS ; 0F 77 [PENT,MMX]
2039
+
2040
+ # |EMMS| sets the FPU tag word (marking which floating-point registers are
2041
+ # available) to all ones, meaning all registers are available for the FPU
2042
+ # to use. It should be used after executing MMX instructions and before
2043
+ # executing any subsequent floating-point operations.
2044
+
2045
+
2046
+ # A.27 |ENTER|: Create Stack Frame
2047
+
2048
+ # ENTER imm,imm ; C8 iw ib [186]
2049
+
2050
+ # |ENTER| constructs a stack frame for a high-level language procedure
2051
+ # call. The first operand (the |iw| in the opcode definition above refers
2052
+ # to the first operand) gives the amount of stack space to allocate for
2053
+ # local variables; the second (the |ib| above) gives the nesting level of
2054
+ # the procedure (for languages like Pascal, with nested procedures).
2055
+
2056
+ # The function of |ENTER|, with a nesting level of zero, is equivalent to
2057
+
2058
+ # PUSH EBP ; or PUSH BP in 16 bits
2059
+ # MOV EBP,ESP ; or MOV BP,SP in 16 bits
2060
+ # SUB ESP,operand1 ; or SUB SP,operand1 in 16 bits
2061
+
2062
+ # This creates a stack frame with the procedure parameters accessible
2063
+ # upwards from |EBP|, and local variables accessible downwards from |EBP|.
2064
+
2065
+ # With a nesting level of one, the stack frame created is 4 (or 2) bytes
2066
+ # bigger, and the value of the final frame pointer |EBP| is accessible in
2067
+ # memory at |[EBP-4]|.
2068
+
2069
+ # This allows |ENTER|, when called with a nesting level of two, to look at
2070
+ # the stack frame described by the /previous/ value of |EBP|, find the
2071
+ # frame pointer at offset -4 from that, and push it along with its new
2072
+ # frame pointer, so that when a level-two procedure is called from within
2073
+ # a level-one procedure, |[EBP-4]| holds the frame pointer of the most
2074
+ # recent level-one procedure call and |[EBP-8]| holds that of the most
2075
+ # recent level-two call. And so on, for nesting levels up to 31.
2076
+
2077
+ # Stack frames created by |ENTER| can be destroyed by the |LEAVE|
2078
+ # instruction: see section A.94 <#section-A.94>.
2079
+
2080
+
2081
+ # A.28 |F2XM1|: Calculate 2**X-1
2082
+
2083
+ # F2XM1 ; D9 F0 [8086,FPU]
2084
+
2085
+ # |F2XM1| raises 2 to the power of |ST0|, subtracts one, and stores the
2086
+ # result back into |ST0|. The initial contents of |ST0| must be a number
2087
+ # in the range -1 to +1.
2088
+
2089
+
2090
+ # A.29 |FABS|: Floating-Point Absolute Value
2091
+
2092
+ # FABS ; D9 E1 [8086,FPU]
2093
+
2094
+ # |FABS| computes the absolute value of |ST0|, storing the result back in
2095
+ # |ST0|.
2096
+
2097
+
2098
+ # A.30 |FADD|, |FADDP|: Floating-Point Addition
2099
+
2100
+ # FADD mem32 ; D8 /0 [8086,FPU]
2101
+ # FADD mem64 ; DC /0 [8086,FPU]
2102
+
2103
+ # FADD fpureg ; D8 C0+r [8086,FPU]
2104
+ # FADD ST0,fpureg ; D8 C0+r [8086,FPU]
2105
+
2106
+ # FADD TO fpureg ; DC C0+r [8086,FPU]
2107
+ # FADD fpureg,ST0 ; DC C0+r [8086,FPU]
2108
+
2109
+ # FADDP fpureg ; DE C0+r [8086,FPU]
2110
+ # FADDP fpureg,ST0 ; DE C0+r [8086,FPU]
2111
+
2112
+ # |FADD|, given one operand, adds the operand to |ST0| and stores the
2113
+ # result back in |ST0|. If the operand has the |TO| modifier, the result
2114
+ # is stored in the register given rather than in |ST0|.
2115
+
2116
+ # |FADDP| performs the same function as |FADD TO|, but pops the register
2117
+ # stack after storing the result.
2118
+
2119
+ # The given two-operand forms are synonyms for the one-operand forms.
2120
+
2121
+
2122
+ # A.31 |FBLD|, |FBSTP|: BCD Floating-Point Load and Store
2123
+
2124
+ # FBLD mem80 ; DF /4 [8086,FPU]
2125
+ # FBSTP mem80 ; DF /6 [8086,FPU]
2126
+
2127
+ # |FBLD| loads an 80-bit (ten-byte) packed binary-coded decimal number
2128
+ # from the given memory address, converts it to a real, and pushes it on
2129
+ # the register stack. |FBSTP| stores the value of |ST0|, in packed BCD, at
2130
+ # the given address and then pops the register stack.
2131
+
2132
+
2133
+ # A.32 |FCHS|: Floating-Point Change Sign
2134
+
2135
+ # FCHS ; D9 E0 [8086,FPU]
2136
+
2137
+ # |FCHS| negates the number in |ST0|: negative numbers become positive,
2138
+ # and vice versa.
2139
+
2140
+
2141
+ # A.33 |FCLEX|, {FNCLEX}: Clear Floating-Point Exceptions
2142
+
2143
+ # FCLEX ; 9B DB E2 [8086,FPU]
2144
+ # FNCLEX ; DB E2 [8086,FPU]
2145
+
2146
+ # |FCLEX| clears any floating-point exceptions which may be pending.
2147
+ # |FNCLEX| does the same thing but doesn't wait for previous
2148
+ # floating-point operations (including the /handling/ of pending
2149
+ # exceptions) to finish first.
2150
+
2151
+
2152
+ # A.34 |FCMOVcc|: Floating-Point Conditional Move
2153
+
2154
+ # FCMOVB fpureg ; DA C0+r [P6,FPU]
2155
+ # FCMOVB ST0,fpureg ; DA C0+r [P6,FPU]
2156
+
2157
+ # FCMOVBE fpureg ; DA D0+r [P6,FPU]
2158
+ # FCMOVBE ST0,fpureg ; DA D0+r [P6,FPU]
2159
+
2160
+ # FCMOVE fpureg ; DA C8+r [P6,FPU]
2161
+ # FCMOVE ST0,fpureg ; DA C8+r [P6,FPU]
2162
+
2163
+ # FCMOVNB fpureg ; DB C0+r [P6,FPU]
2164
+ # FCMOVNB ST0,fpureg ; DB C0+r [P6,FPU]
2165
+
2166
+ # FCMOVNBE fpureg ; DB D0+r [P6,FPU]
2167
+ # FCMOVNBE ST0,fpureg ; DB D0+r [P6,FPU]
2168
+
2169
+ # FCMOVNE fpureg ; DB C8+r [P6,FPU]
2170
+ # FCMOVNE ST0,fpureg ; DB C8+r [P6,FPU]
2171
+
2172
+ # FCMOVNU fpureg ; DB D8+r [P6,FPU]
2173
+ # FCMOVNU ST0,fpureg ; DB D8+r [P6,FPU]
2174
+
2175
+ # FCMOVU fpureg ; DA D8+r [P6,FPU]
2176
+ # FCMOVU ST0,fpureg ; DA D8+r [P6,FPU]
2177
+
2178
+ # The |FCMOV| instructions perform conditional move operations: each of
2179
+ # them moves the contents of the given register into |ST0| if its
2180
+ # condition is satisfied, and does nothing if not.
2181
+
2182
+ # The conditions are not the same as the standard condition codes used
2183
+ # with conditional jump instructions. The conditions |B|, |BE|, |NB|,
2184
+ # |NBE|, |E| and |NE| are exactly as normal, but none of the other
2185
+ # standard ones are supported. Instead, the condition |U| and its
2186
+ # counterpart |NU| are provided; the |U| condition is satisfied if the
2187
+ # last two floating-point numbers compared were /unordered/, i.e. they
2188
+ # were not equal but neither one could be said to be greater than the
2189
+ # other, for example if they were NaNs. (The flag state which signals this
2190
+ # is the setting of the parity flag: so the |U| condition is notionally
2191
+ # equivalent to |PE|, and |NU| is equivalent to |PO|.)
2192
+
2193
+ # The |FCMOV| conditions test the main processor's status flags, not the
2194
+ # FPU status flags, so using |FCMOV| directly after |FCOM| will not work.
2195
+ # Instead, you should either use |FCOMI| which writes directly to the main
2196
+ # CPU flags word, or use |FSTSW| to extract the FPU flags.
2197
+
2198
+ # Although the |FCMOV| instructions are flagged |P6| above, they may not
2199
+ # be supported by all Pentium Pro processors; the |CPUID| instruction
2200
+ # (section A.22 <#section-A.22>) will return a bit which indicates whether
2201
+ # conditional moves are supported.
2202
+
2203
+
2204
+ # A.35 |FCOM|, |FCOMP|, |FCOMPP|, |FCOMI|, |FCOMIP|: Floating-Point
2205
+ # Compare
2206
+
2207
+ # FCOM mem32 ; D8 /2 [8086,FPU]
2208
+ # FCOM mem64 ; DC /2 [8086,FPU]
2209
+ # FCOM fpureg ; D8 D0+r [8086,FPU]
2210
+ # FCOM ST0,fpureg ; D8 D0+r [8086,FPU]
2211
+
2212
+ # FCOMP mem32 ; D8 /3 [8086,FPU]
2213
+ # FCOMP mem64 ; DC /3 [8086,FPU]
2214
+ # FCOMP fpureg ; D8 D8+r [8086,FPU]
2215
+ # FCOMP ST0,fpureg ; D8 D8+r [8086,FPU]
2216
+
2217
+ # FCOMPP ; DE D9 [8086,FPU]
2218
+
2219
+ # FCOMI fpureg ; DB F0+r [P6,FPU]
2220
+ # FCOMI ST0,fpureg ; DB F0+r [P6,FPU]
2221
+
2222
+ # FCOMIP fpureg ; DF F0+r [P6,FPU]
2223
+ # FCOMIP ST0,fpureg ; DF F0+r [P6,FPU]
2224
+
2225
+ # |FCOM| compares |ST0| with the given operand, and sets the FPU flags
2226
+ # accordingly. |ST0| is treated as the left-hand side of the comparison,
2227
+ # so that the carry flag is set (for a `less-than' result) if |ST0| is
2228
+ # less than the given operand.
2229
+
2230
+ # |FCOMP| does the same as |FCOM|, but pops the register stack afterwards.
2231
+ # |FCOMPP| compares |ST0| with |ST1| and then pops the register stack twice.
2232
+
2233
+ # |FCOMI| and |FCOMIP| work like the corresponding forms of |FCOM| and
2234
+ # |FCOMP|, but write their results directly to the CPU flags register
2235
+ # rather than the FPU status word, so they can be immediately followed by
2236
+ # conditional jump or conditional move instructions.
2237
+
2238
+ # The |FCOM| instructions differ from the |FUCOM| instructions (section
2239
+ # A.69 <#section-A.69>) only in the way they handle quiet NaNs: |FUCOM|
2240
+ # will handle them silently and set the condition code flags to an
2241
+ # `unordered' result, whereas |FCOM| will generate an exception.
2242
+
2243
+
2244
+ # A.36 |FCOS|: Cosine
2245
+
2246
+ # FCOS ; D9 FF [386,FPU]
2247
+
2248
+ # |FCOS| computes the cosine of |ST0| (in radians), and stores the result
2249
+ # in |ST0|. See also |FSINCOS| (section A.61 <#section-A.61>).
2250
+
2251
+
2252
+ # A.37 |FDECSTP|: Decrement Floating-Point Stack Pointer
2253
+
2254
+ # FDECSTP ; D9 F6 [8086,FPU]
2255
+
2256
+ # |FDECSTP| decrements the `top' field in the floating-point status word.
2257
+ # This has the effect of rotating the FPU register stack by one, as if the
2258
+ # contents of |ST7| had been pushed on the stack. See also |FINCSTP|
2259
+ # (section A.46 <#section-A.46>).
2260
+
2261
+
2262
+ # A.38 |FxDISI|, |FxENI|: Disable and Enable Floating-Point Interrupts
2263
+
2264
+ # FDISI ; 9B DB E1 [8086,FPU]
2265
+ # FNDISI ; DB E1 [8086,FPU]
2266
+
2267
+ # FENI ; 9B DB E0 [8086,FPU]
2268
+ # FNENI ; DB E0 [8086,FPU]
2269
+
2270
+ # |FDISI| and |FENI| disable and enable floating-point interrupts. These
2271
+ # instructions are only meaningful on original 8087 processors: the 287
2272
+ # and above treat them as no-operation instructions.
2273
+
2274
+ # |FNDISI| and |FNENI| do the same thing as |FDISI| and |FENI|
2275
+ # respectively, but without waiting for the floating-point processor to
2276
+ # finish what it was doing first.
2277
+
2278
+
2279
+ # A.39 |FDIV|, |FDIVP|, |FDIVR|, |FDIVRP|: Floating-Point Division
2280
+
2281
+ # FDIV mem32 ; D8 /6 [8086,FPU]
2282
+ # FDIV mem64 ; DC /6 [8086,FPU]
2283
+
2284
+ # FDIV fpureg ; D8 F0+r [8086,FPU]
2285
+ # FDIV ST0,fpureg ; D8 F0+r [8086,FPU]
2286
+
2287
+ # FDIV TO fpureg ; DC F8+r [8086,FPU]
2288
+ # FDIV fpureg,ST0 ; DC F8+r [8086,FPU]
2289
+
2290
+ # FDIVR mem32 ; D8 /0 [8086,FPU]
2291
+ # FDIVR mem64 ; DC /0 [8086,FPU]
2292
+
2293
+ # FDIVR fpureg ; D8 F8+r [8086,FPU]
2294
+ # FDIVR ST0,fpureg ; D8 F8+r [8086,FPU]
2295
+
2296
+ # FDIVR TO fpureg ; DC F0+r [8086,FPU]
2297
+ # FDIVR fpureg,ST0 ; DC F0+r [8086,FPU]
2298
+
2299
+ # FDIVP fpureg ; DE F8+r [8086,FPU]
2300
+ # FDIVP fpureg,ST0 ; DE F8+r [8086,FPU]
2301
+
2302
+ # FDIVRP fpureg ; DE F0+r [8086,FPU]
2303
+ # FDIVRP fpureg,ST0 ; DE F0+r [8086,FPU]
2304
+
2305
+ # |FDIV| divides |ST0| by the given operand and stores the result back in
2306
+ # |ST0|, unless the |TO| qualifier is given, in which case it divides the
2307
+ # given operand by |ST0| and stores the result in the operand.
2308
+
2309
+ # |FDIVR| does the same thing, but does the division the other way up: so
2310
+ # if |TO| is not given, it divides the given operand by |ST0| and stores
2311
+ # the result in |ST0|, whereas if |TO| is given it divides |ST0| by its
2312
+ # operand and stores the result in the operand.
2313
+
2314
+ # |FDIVP| operates like |FDIV TO|, but pops the register stack once it has
2315
+ # finished. |FDIVRP| operates like |FDIVR TO|, but pops the register stack
2316
+ # once it has finished.
2317
+
2318
+
2319
+ # A.40 |FFREE|: Flag Floating-Point Register as Unused
2320
+
2321
+ # FFREE fpureg ; DD C0+r [8086,FPU]
2322
+
2323
+ # |FFREE| marks the given register as being empty.
2324
+
2325
+
2326
+ # A.41 |FIADD|: Floating-Point/Integer Addition
2327
+
2328
+ # FIADD mem16 ; DE /0 [8086,FPU]
2329
+ # FIADD mem32 ; DA /0 [8086,FPU]
2330
+
2331
+ # |FIADD| adds the 16-bit or 32-bit integer stored in the given memory
2332
+ # location to |ST0|, storing the result in |ST0|.
2333
+
2334
+
2335
+ # A.42 |FICOM|, |FICOMP|: Floating-Point/Integer Compare
2336
+
2337
+ # FICOM mem16 ; DE /2 [8086,FPU]
2338
+ # FICOM mem32 ; DA /2 [8086,FPU]
2339
+
2340
+ # FICOMP mem16 ; DE /3 [8086,FPU]
2341
+ # FICOMP mem32 ; DA /3 [8086,FPU]
2342
+
2343
+ # |FICOM| compares |ST0| with the 16-bit or 32-bit integer stored in the
2344
+ # given memory location, and sets the FPU flags accordingly. |FICOMP| does
2345
+ # the same, but pops the register stack afterwards.
2346
+
2347
+
2348
+ # A.43 |FIDIV|, |FIDIVR|: Floating-Point/Integer Division
2349
+
2350
+ # FIDIV mem16 ; DE /6 [8086,FPU]
2351
+ # FIDIV mem32 ; DA /6 [8086,FPU]
2352
+
2353
+ # FIDIVR mem16 ; DE /0 [8086,FPU]
2354
+ # FIDIVR mem32 ; DA /0 [8086,FPU]
2355
+
2356
+ # |FIDIV| divides |ST0| by the 16-bit or 32-bit integer stored in the
2357
+ # given memory location, and stores the result in |ST0|. |FIDIVR| does the
2358
+ # division the other way up: it divides the integer by |ST0|, but still
2359
+ # stores the result in |ST0|.
2360
+
2361
+
2362
+ # A.44 |FILD|, |FIST|, |FISTP|: Floating-Point/Integer Conversion
2363
+
2364
+ # FILD mem16 ; DF /0 [8086,FPU]
2365
+ # FILD mem32 ; DB /0 [8086,FPU]
2366
+ # FILD mem64 ; DF /5 [8086,FPU]
2367
+
2368
+ # FIST mem16 ; DF /2 [8086,FPU]
2369
+ # FIST mem32 ; DB /2 [8086,FPU]
2370
+
2371
+ # FISTP mem16 ; DF /3 [8086,FPU]
2372
+ # FISTP mem32 ; DB /3 [8086,FPU]
2373
+ # FISTP mem64 ; DF /0 [8086,FPU]
2374
+
2375
+ # |FILD| loads an integer out of a memory location, converts it to a real,
2376
+ # and pushes it on the FPU register stack. |FIST| converts |ST0| to an
2377
+ # integer and stores that in memory; |FISTP| does the same as |FIST|, but
2378
+ # pops the register stack afterwards.
2379
+
2380
+
2381
+ # A.45 |FIMUL|: Floating-Point/Integer Multiplication
2382
+
2383
+ # FIMUL mem16 ; DE /1 [8086,FPU]
2384
+ # FIMUL mem32 ; DA /1 [8086,FPU]
2385
+
2386
+ # |FIMUL| multiplies |ST0| by the 16-bit or 32-bit integer stored in the
2387
+ # given memory location, and stores the result in |ST0|.
2388
+
2389
+
2390
+ # A.46 |FINCSTP|: Increment Floating-Point Stack Pointer
2391
+
2392
+ # FINCSTP ; D9 F7 [8086,FPU]
2393
+
2394
+ # |FINCSTP| increments the `top' field in the floating-point status word.
2395
+ # This has the effect of rotating the FPU register stack by one, as if the
2396
+ # register stack had been popped; however, unlike the popping of the stack
2397
+ # performed by many FPU instructions, it does not flag the new |ST7|
2398
+ # (previously |ST0|) as empty. See also |FDECSTP| (section A.37
2399
+ # <#section-A.37>).
2400
+
2401
+
2402
+ # A.47 |FINIT|, |FNINIT|: Initialise Floating-Point Unit
2403
+
2404
+ # FINIT ; 9B DB E3 [8086,FPU]
2405
+ # FNINIT ; DB E3 [8086,FPU]
2406
+
2407
+ # |FINIT| initialises the FPU to its default state. It flags all registers
2408
+ # as empty, though it does not actually change their values. |FNINIT| does
2409
+ # the same, without first waiting for pending exceptions to clear.
2410
+
2411
+
2412
+ # A.48 |FISUB|: Floating-Point/Integer Subtraction
2413
+
2414
+ # FISUB mem16 ; DE /4 [8086,FPU]
2415
+ # FISUB mem32 ; DA /4 [8086,FPU]
2416
+
2417
+ # FISUBR mem16 ; DE /5 [8086,FPU]
2418
+ # FISUBR mem32 ; DA /5 [8086,FPU]
2419
+
2420
+ # |FISUB| subtracts the 16-bit or 32-bit integer stored in the given
2421
+ # memory location from |ST0|, and stores the result in |ST0|. |FISUBR|
2422
+ # does the subtraction the other way round, i.e. it subtracts |ST0| from
2423
+ # the given integer, but still stores the result in |ST0|.
2424
+
2425
+
2426
+ # A.49 |FLD|: Floating-Point Load
2427
+
2428
+ # FLD mem32 ; D9 /0 [8086,FPU]
2429
+ # FLD mem64 ; DD /0 [8086,FPU]
2430
+ # FLD mem80 ; DB /5 [8086,FPU]
2431
+ # FLD fpureg ; D9 C0+r [8086,FPU]
2432
+
2433
+ # |FLD| loads a floating-point value out of the given register or memory
2434
+ # location, and pushes it on the FPU register stack.
2435
+
2436
+
2437
+ # A.50 |FLDxx|: Floating-Point Load Constants
2438
+
2439
+ # FLD1 ; D9 E8 [8086,FPU]
2440
+ # FLDL2E ; D9 EA [8086,FPU]
2441
+ # FLDL2T ; D9 E9 [8086,FPU]
2442
+ # FLDLG2 ; D9 EC [8086,FPU]
2443
+ # FLDLN2 ; D9 ED [8086,FPU]
2444
+ # FLDPI ; D9 EB [8086,FPU]
2445
+ # FLDZ ; D9 EE [8086,FPU]
2446
+
2447
+ # These instructions push specific standard constants on the FPU register
2448
+ # stack. |FLD1| pushes the value 1; |FLDL2E| pushes the base-2 logarithm
2449
+ # of e; |FLDL2T| pushes the base-2 log of 10; |FLDLG2| pushes the base-10
2450
+ # log of 2; |FLDLN2| pushes the base-e log of 2; |FLDPI| pushes pi; and
2451
+ # |FLDZ| pushes zero.
2452
+
2453
+
2454
+ # A.51 |FLDCW|: Load Floating-Point Control Word
2455
+
2456
+ # FLDCW mem16 ; D9 /5 [8086,FPU]
2457
+
2458
+ # |FLDCW| loads a 16-bit value out of memory and stores it into the FPU
2459
+ # control word (governing things like the rounding mode, the precision,
2460
+ # and the exception masks). See also |FSTCW| (section A.64 <#section-A.64>).
2461
+
2462
+
2463
+ # A.52 |FLDENV|: Load Floating-Point Environment
2464
+
2465
+ # FLDENV mem ; D9 /4 [8086,FPU]
2466
+
2467
+ # |FLDENV| loads the FPU operating environment (control word, status word,
2468
+ # tag word, instruction pointer, data pointer and last opcode) from
2469
+ # memory. The memory area is 14 or 28 bytes long, depending on the CPU
2470
+ # mode at the time. See also |FSTENV| (section A.65 <#section-A.65>).
2471
+
2472
+
2473
+ # A.53 |FMUL|, |FMULP|: Floating-Point Multiply
2474
+
2475
+ # FMUL mem32 ; D8 /1 [8086,FPU]
2476
+ # FMUL mem64 ; DC /1 [8086,FPU]
2477
+
2478
+ # FMUL fpureg ; D8 C8+r [8086,FPU]
2479
+ # FMUL ST0,fpureg ; D8 C8+r [8086,FPU]
2480
+
2481
+ # FMUL TO fpureg ; DC C8+r [8086,FPU]
2482
+ # FMUL fpureg,ST0 ; DC C8+r [8086,FPU]
2483
+
2484
+ # FMULP fpureg ; DE C8+r [8086,FPU]
2485
+ # FMULP fpureg,ST0 ; DE C8+r [8086,FPU]
2486
+
2487
+ # |FMUL| multiplies |ST0| by the given operand, and stores the result in
2488
+ # |ST0|, unless the |TO| qualifier is used in which case it stores the
2489
+ # result in the operand. |FMULP| performs the same operation as |FMUL TO|,
2490
+ # and then pops the register stack.
2491
+
2492
+
2493
+ # A.54 |FNOP|: Floating-Point No Operation
2494
+
2495
+ # FNOP ; D9 D0 [8086,FPU]
2496
+
2497
+ # |FNOP| does nothing.
2498
+
2499
+
2500
+ # A.55 |FPATAN|, |FPTAN|: Arctangent and Tangent
2501
+
2502
+ # FPATAN ; D9 F3 [8086,FPU]
2503
+ # FPTAN ; D9 F2 [8086,FPU]
2504
+
2505
+ # |FPATAN| computes the arctangent, in radians, of the result of dividing
2506
+ # |ST1| by |ST0|, stores the result in |ST1|, and pops the register stack.
2507
+ # It works like the C |atan2| function, in that changing the sign of both
2508
+ # |ST0| and |ST1| changes the output value by pi (so it performs true
2509
+ # rectangular-to-polar coordinate conversion, with |ST1| being the Y
2510
+ # coordinate and |ST0| being the X coordinate, not merely an arctangent).
2511
+
2512
+ # |FPTAN| computes the tangent of the value in |ST0| (in radians), and
2513
+ # stores the result back into |ST0|.
2514
+
2515
+
2516
+ # A.56 |FPREM|, |FPREM1|: Floating-Point Partial Remainder
2517
+
2518
+ # FPREM ; D9 F8 [8086,FPU]
2519
+ # FPREM1 ; D9 F5 [386,FPU]
2520
+
2521
+ # These instructions both produce the remainder obtained by dividing |ST0|
2522
+ # by |ST1|. This is calculated, notionally, by dividing |ST0| by |ST1|,
2523
+ # rounding the result to an integer, multiplying by |ST1| again, and
2524
+ # computing the value which would need to be added back on to the result
2525
+ # to get back to the original value in |ST0|.
2526
+
2527
+ # The two instructions differ in the way the notional round-to-integer
2528
+ # operation is performed. |FPREM| does it by rounding towards zero, so
2529
+ # that the remainder it returns always has the same sign as the original
2530
+ # value in |ST0|; |FPREM1| does it by rounding to the nearest integer, so
2531
+ # that the remainder always has at most half the magnitude of |ST1|.
2532
+
2533
+ # Both instructions calculate /partial/ remainders, meaning that they may
2534
+ # not manage to provide the final result, but might leave intermediate
2535
+ # results in |ST0| instead. If this happens, they will set the C2 flag in
2536
+ # the FPU status word; therefore, to calculate a remainder, you should
2537
+ # repeatedly execute |FPREM| or |FPREM1| until C2 becomes clear.
2538
+
2539
+
2540
+ # A.57 |FRNDINT|: Floating-Point Round to Integer
2541
+
2542
+ # FRNDINT ; D9 FC [8086,FPU]
2543
+
2544
+ # |FRNDINT| rounds the contents of |ST0| to an integer, according to the
2545
+ # current rounding mode set in the FPU control word, and stores the result
2546
+ # back in |ST0|.
2547
+
2548
+
2549
+ # A.58 |FSAVE|, |FRSTOR|: Save/Restore Floating-Point State
2550
+
2551
+ # FSAVE mem ; 9B DD /6 [8086,FPU]
2552
+ # FNSAVE mem ; DD /6 [8086,FPU]
2553
+
2554
+ # FRSTOR mem ; DD /4 [8086,FPU]
2555
+
2556
+ # |FSAVE| saves the entire floating-point unit state, including all the
2557
+ # information saved by |FSTENV| (section A.65 <#section-A.65>) plus the
2558
+ # contents of all the registers, to a 94 or 108 byte area of memory
2559
+ # (depending on the CPU mode). |FRSTOR| restores the floating-point state
2560
+ # from the same area of memory.
2561
+
2562
+ # |FNSAVE| does the same as |FSAVE|, without first waiting for pending
2563
+ # floating-point exceptions to clear.
2564
+
2565
+
2566
+ # A.59 |FSCALE|: Scale Floating-Point Value by Power of Two
2567
+
2568
+ # FSCALE ; D9 FD [8086,FPU]
2569
+
2570
+ # |FSCALE| scales a number by a power of two: it rounds |ST1| towards zero
2571
+ # to obtain an integer, then multiplies |ST0| by two to the power of that
2572
+ # integer, and stores the result in |ST0|.
2573
+
2574
+
2575
+ # A.60 |FSETPM|: Set Protected Mode
2576
+
2577
+ # FSETPM ; DB E4 [286,FPU]
2578
+
2579
+ # This instruction initalises protected mode on the 287 floating-point
2580
+ # coprocessor. It is only meaningful on that processor: the 387 and above
2581
+ # treat the instruction as a no-operation.
2582
+
2583
+
2584
+ # A.61 |FSIN|, |FSINCOS|: Sine and Cosine
2585
+
2586
+ # FSIN ; D9 FE [386,FPU]
2587
+ # FSINCOS ; D9 FB [386,FPU]
2588
+
2589
+ # |FSIN| calculates the sine of |ST0| (in radians) and stores the result
2590
+ # in |ST0|. |FSINCOS| does the same, but then pushes the cosine of the
2591
+ # same value on the register stack, so that the sine ends up in |ST1| and
2592
+ # the cosine in |ST0|. |FSINCOS| is faster than executing |FSIN| and
2593
+ # |FCOS| (see section A.36 <#section-A.36>) in succession.
2594
+
2595
+
2596
+ # A.62 |FSQRT|: Floating-Point Square Root
2597
+
2598
+ # FSQRT ; D9 FA [8086,FPU]
2599
+
2600
+ # |FSQRT| calculates the square root of |ST0| and stores the result in |ST0|.
2601
+
2602
+
2603
+ # A.63 |FST|, |FSTP|: Floating-Point Store
2604
+
2605
+ # FST mem32 ; D9 /2 [8086,FPU]
2606
+ # FST mem64 ; DD /2 [8086,FPU]
2607
+ # FST fpureg ; DD D0+r [8086,FPU]
2608
+
2609
+ # FSTP mem32 ; D9 /3 [8086,FPU]
2610
+ # FSTP mem64 ; DD /3 [8086,FPU]
2611
+ # FSTP mem80 ; DB /0 [8086,FPU]
2612
+ # FSTP fpureg ; DD D8+r [8086,FPU]
2613
+
2614
+ # |FST| stores the value in |ST0| into the given memory location or other
2615
+ # FPU register. |FSTP| does the same, but then pops the register stack.
2616
+
2617
+
2618
+ # A.64 |FSTCW|: Store Floating-Point Control Word
2619
+
2620
+ # FSTCW mem16 ; 9B D9 /0 [8086,FPU]
2621
+ # FNSTCW mem16 ; D9 /0 [8086,FPU]
2622
+
2623
+ # |FSTCW| stores the FPU control word (governing things like the rounding
2624
+ # mode, the precision, and the exception masks) into a 2-byte memory area.
2625
+ # See also |FLDCW| (section A.51 <#section-A.51>).
2626
+
2627
+ # |FNSTCW| does the same thing as |FSTCW|, without first waiting for
2628
+ # pending floating-point exceptions to clear.
2629
+
2630
+
2631
+ # A.65 |FSTENV|: Store Floating-Point Environment
2632
+
2633
+ # FSTENV mem ; 9B D9 /6 [8086,FPU]
2634
+ # FNSTENV mem ; D9 /6 [8086,FPU]
2635
+
2636
+ # |FSTENV| stores the FPU operating environment (control word, status
2637
+ # word, tag word, instruction pointer, data pointer and last opcode) into
2638
+ # memory. The memory area is 14 or 28 bytes long, depending on the CPU
2639
+ # mode at the time. See also |FLDENV| (section A.52 <#section-A.52>).
2640
+
2641
+ # |FNSTENV| does the same thing as |FSTENV|, without first waiting for
2642
+ # pending floating-point exceptions to clear.
2643
+
2644
+
2645
+ # A.66 |FSTSW|: Store Floating-Point Status Word
2646
+
2647
+ # FSTSW mem16 ; 9B DD /0 [8086,FPU]
2648
+ # FSTSW AX ; 9B DF E0 [286,FPU]
2649
+
2650
+ # FNSTSW mem16 ; DD /0 [8086,FPU]
2651
+ # FNSTSW AX ; DF E0 [286,FPU]
2652
+
2653
+ # |FSTSW| stores the FPU status word into |AX| or into a 2-byte memory area.
2654
+
2655
+ # |FNSTSW| does the same thing as |FSTSW|, without first waiting for
2656
+ # pending floating-point exceptions to clear.
2657
+
2658
+
2659
+ # A.67 |FSUB|, |FSUBP|, |FSUBR|, |FSUBRP|: Floating-Point Subtract
2660
+
2661
+ # FSUB mem32 ; D8 /4 [8086,FPU]
2662
+ # FSUB mem64 ; DC /4 [8086,FPU]
2663
+
2664
+ # FSUB fpureg ; D8 E0+r [8086,FPU]
2665
+ # FSUB ST0,fpureg ; D8 E0+r [8086,FPU]
2666
+
2667
+ # FSUB TO fpureg ; DC E8+r [8086,FPU]
2668
+ # FSUB fpureg,ST0 ; DC E8+r [8086,FPU]
2669
+
2670
+ # FSUBR mem32 ; D8 /5 [8086,FPU]
2671
+ # FSUBR mem64 ; DC /5 [8086,FPU]
2672
+
2673
+ # FSUBR fpureg ; D8 E8+r [8086,FPU]
2674
+ # FSUBR ST0,fpureg ; D8 E8+r [8086,FPU]
2675
+
2676
+ # FSUBR TO fpureg ; DC E0+r [8086,FPU]
2677
+ # FSUBR fpureg,ST0 ; DC E0+r [8086,FPU]
2678
+
2679
+ # FSUBP fpureg ; DE E8+r [8086,FPU]
2680
+ # FSUBP fpureg,ST0 ; DE E8+r [8086,FPU]
2681
+
2682
+ # FSUBRP fpureg ; DE E0+r [8086,FPU]
2683
+ # FSUBRP fpureg,ST0 ; DE E0+r [8086,FPU]
2684
+
2685
+ # |FSUB| subtracts the given operand from |ST0| and stores the result back
2686
+ # in |ST0|, unless the |TO| qualifier is given, in which case it subtracts
2687
+ # |ST0| from the given operand and stores the result in the operand.
2688
+
2689
+ # |FSUBR| does the same thing, but does the subtraction the other way up:
2690
+ # so if |TO| is not given, it subtracts |ST0| from the given operand and
2691
+ # stores the result in |ST0|, whereas if |TO| is given it subtracts its
2692
+ # operand from |ST0| and stores the result in the operand.
2693
+
2694
+ # |FSUBP| operates like |FSUB TO|, but pops the register stack once it has
2695
+ # finished. |FSUBRP| operates like |FSUBR TO|, but pops the register stack
2696
+ # once it has finished.
2697
+
2698
+
2699
+ # A.68 |FTST|: Test |ST0| Against Zero
2700
+
2701
+ # FTST ; D9 E4 [8086,FPU]
2702
+
2703
+ # |FTST| compares |ST0| with zero and sets the FPU flags accordingly.
2704
+ # |ST0| is treated as the left-hand side of the comparison, so that a
2705
+ # `less-than' result is generated if |ST0| is negative.
2706
+
2707
+
2708
+ # A.69 |FUCOMxx|: Floating-Point Unordered Compare
2709
+
2710
+ # FUCOM fpureg ; DD E0+r [386,FPU]
2711
+ # FUCOM ST0,fpureg ; DD E0+r [386,FPU]
2712
+
2713
+ # FUCOMP fpureg ; DD E8+r [386,FPU]
2714
+ # FUCOMP ST0,fpureg ; DD E8+r [386,FPU]
2715
+
2716
+ # FUCOMPP ; DA E9 [386,FPU]
2717
+
2718
+ # FUCOMI fpureg ; DB E8+r [P6,FPU]
2719
+ # FUCOMI ST0,fpureg ; DB E8+r [P6,FPU]
2720
+
2721
+ # FUCOMIP fpureg ; DF E8+r [P6,FPU]
2722
+ # FUCOMIP ST0,fpureg ; DF E8+r [P6,FPU]
2723
+
2724
+ # |FUCOM| compares |ST0| with the given operand, and sets the FPU flags
2725
+ # accordingly. |ST0| is treated as the left-hand side of the comparison,
2726
+ # so that the carry flag is set (for a `less-than' result) if |ST0| is
2727
+ # less than the given operand.
2728
+
2729
+ # |FUCOMP| does the same as |FUCOM|, but pops the register stack
2730
+ # afterwards. |FUCOMPP| compares |ST0| with |ST1| and then pops the
2731
+ # register stack twice.
2732
+
2733
+ # |FUCOMI| and |FUCOMIP| work like the corresponding forms of |FUCOM| and
2734
+ # |FUCOMP|, but write their results directly to the CPU flags register
2735
+ # rather than the FPU status word, so they can be immediately followed by
2736
+ # conditional jump or conditional move instructions.
2737
+
2738
+ # The |FUCOM| instructions differ from the |FCOM| instructions (section
2739
+ # A.35 <#section-A.35>) only in the way they handle quiet NaNs: |FUCOM|
2740
+ # will handle them silently and set the condition code flags to an
2741
+ # `unordered' result, whereas |FCOM| will generate an exception.
2742
+
2743
+
2744
+ # A.70 |FXAM|: Examine Class of Value in |ST0|
2745
+
2746
+ # FXAM ; D9 E5 [8086,FPU]
2747
+
2748
+ # |FXAM| sets the FPU flags C3, C2 and C0 depending on the type of value
2749
+ # stored in |ST0|: 000 (respectively) for an unsupported format, 001 for a
2750
+ # NaN, 010 for a normal finite number, 011 for an infinity, 100 for a
2751
+ # zero, 101 for an empty register, and 110 for a denormal. It also sets
2752
+ # the C1 flag to the sign of the number.
2753
+
2754
+
2755
+ # A.71 |FXCH|: Floating-Point Exchange
2756
+
2757
+ # FXCH ; D9 C9 [8086,FPU]
2758
+ # FXCH fpureg ; D9 C8+r [8086,FPU]
2759
+ # FXCH fpureg,ST0 ; D9 C8+r [8086,FPU]
2760
+ # FXCH ST0,fpureg ; D9 C8+r [8086,FPU]
2761
+
2762
+ # |FXCH| exchanges |ST0| with a given FPU register. The no-operand form
2763
+ # exchanges |ST0| with |ST1|.
2764
+
2765
+
2766
+ # A.72 |FXTRACT|: Extract Exponent and Significand
2767
+
2768
+ # FXTRACT ; D9 F4 [8086,FPU]
2769
+
2770
+ # |FXTRACT| separates the number in |ST0| into its exponent and
2771
+ # significand (mantissa), stores the exponent back into |ST0|, and then
2772
+ # pushes the significand on the register stack (so that the significand
2773
+ # ends up in |ST0|, and the exponent in |ST1|).
2774
+
2775
+
2776
+ # A.73 |FYL2X|, |FYL2XP1|: Compute Y times Log2(X) or Log2(X+1)
2777
+
2778
+ # FYL2X ; D9 F1 [8086,FPU]
2779
+ # FYL2XP1 ; D9 F9 [8086,FPU]
2780
+
2781
+ # |FYL2X| multiplies |ST1| by the base-2 logarithm of |ST0|, stores the
2782
+ # result in |ST1|, and pops the register stack (so that the result ends up
2783
+ # in |ST0|). |ST0| must be non-zero and positive.
2784
+
2785
+ # |FYL2XP1| works the same way, but replacing the base-2 log of |ST0| with
2786
+ # that of |ST0| plus one. This time, |ST0| must have magnitude no greater
2787
+ # than 1 minus half the square root of two.
2788
+
2789
+
2790
+ # A.74 |HLT|: Halt Processor
2791
+
2792
+ # HLT ; F4 [8086]
2793
+
2794
+ # |HLT| puts the processor into a halted state, where it will perform no
2795
+ # more operations until restarted by an interrupt or a reset.
2796
+
2797
+
2798
+ # A.75 |IBTS|: Insert Bit String
2799
+
2800
+ # IBTS r/m16,reg16 ; o16 0F A7 /r [386,UNDOC]
2801
+ # IBTS r/m32,reg32 ; o32 0F A7 /r [386,UNDOC]
2802
+
2803
+ # No clear documentation seems to be available for this instruction: the
2804
+ # best I've been able to find reads `Takes a string of bits from the
2805
+ # second operand and puts them in the first operand'. It is present only
2806
+ # in early 386 processors, and conflicts with the opcodes for
2807
+ # |CMPXCHG486|. NASM supports it only for completeness. Its counterpart is
2808
+ # |XBTS| (see section A.167 <#section-A.167>).
2809
+
2810
+
2811
+ # A.76 |IDIV|: Signed Integer Divide
2812
+
2813
+ # IDIV r/m8 ; F6 /7 [8086]
2814
+ # IDIV r/m16 ; o16 F7 /7 [8086]
2815
+ # IDIV r/m32 ; o32 F7 /7 [386]
2816
+
2817
+ # |IDIV| performs signed integer division. The explicit operand provided
2818
+ # is the divisor; the dividend and destination operands are implicit, in
2819
+ # the following way:
2820
+
2821
+ # * For |IDIV r/m8|, |AX| is divided by the given operand; the
2822
+ # quotient is stored in |AL| and the remainder in |AH|.
2823
+ # * For |IDIV r/m16|, |DX:AX| is divided by the given operand; the
2824
+ # quotient is stored in |AX| and the remainder in |DX|.
2825
+ # * For |IDIV r/m32|, |EDX:EAX| is divided by the given operand; the
2826
+ # quotient is stored in |EAX| and the remainder in |EDX|.
2827
+
2828
+ # Unsigned integer division is performed by the |DIV| instruction: see
2829
+ # section A.25 <#section-A.25>.
2830
+
2831
+
2832
+ # A.77 |IMUL|: Signed Integer Multiply
2833
+
2834
+ # IMUL r/m8 ; F6 /5 [8086]
2835
+ # IMUL r/m16 ; o16 F7 /5 [8086]
2836
+ # IMUL r/m32 ; o32 F7 /5 [386]
2837
+
2838
+ # IMUL reg16,r/m16 ; o16 0F AF /r [386]
2839
+ # IMUL reg32,r/m32 ; o32 0F AF /r [386]
2840
+
2841
+ # IMUL reg16,imm8 ; o16 6B /r ib [286]
2842
+ # IMUL reg16,imm16 ; o16 69 /r iw [286]
2843
+ # IMUL reg32,imm8 ; o32 6B /r ib [386]
2844
+ # IMUL reg32,imm32 ; o32 69 /r id [386]
2845
+
2846
+ # IMUL reg16,r/m16,imm8 ; o16 6B /r ib [286]
2847
+ # IMUL reg16,r/m16,imm16 ; o16 69 /r iw [286]
2848
+ # IMUL reg32,r/m32,imm8 ; o32 6B /r ib [386]
2849
+ # IMUL reg32,r/m32,imm32 ; o32 69 /r id [386]
2850
+
2851
+ # |IMUL| performs signed integer multiplication. For the single-operand
2852
+ # form, the other operand and destination are implicit, in the following way:
2853
+
2854
+ # * For |IMUL r/m8|, |AL| is multiplied by the given operand; the
2855
+ # product is stored in |AX|.
2856
+ # * For |IMUL r/m16|, |AX| is multiplied by the given operand; the
2857
+ # product is stored in |DX:AX|.
2858
+ # * For |IMUL r/m32|, |EAX| is multiplied by the given operand; the
2859
+ # product is stored in |EDX:EAX|.
2860
+
2861
+ # The two-operand form multiplies its two operands and stores the result
2862
+ # in the destination (first) operand. The three-operand form multiplies
2863
+ # its last two operands and stores the result in the first operand.
2864
+
2865
+ # The two-operand form is in fact a shorthand for the three-operand form,
2866
+ # as can be seen by examining the opcode descriptions: in the two-operand
2867
+ # form, the code |/r| takes both its register and |r/m| parts from the
2868
+ # same operand (the first one).
2869
+
2870
+ # In the forms with an 8-bit immediate operand and another longer source
2871
+ # operand, the immediate operand is considered to be signed, and is
2872
+ # sign-extended to the length of the other source operand. In these cases,
2873
+ # the |BYTE| qualifier is necessary to force NASM to generate this form of
2874
+ # the instruction.
2875
+
2876
+ # Unsigned integer multiplication is performed by the |MUL| instruction:
2877
+ # see section A.107 <#section-A.107>.
2878
+
2879
+
2880
+ # A.78 |IN|: Input from I/O Port
2881
+
2882
+ # IN AL,imm8 ; E4 ib [8086]
2883
+ # IN AX,imm8 ; o16 E5 ib [8086]
2884
+ # IN EAX,imm8 ; o32 E5 ib [386]
2885
+ # IN AL,DX ; EC [8086]
2886
+ # IN AX,DX ; o16 ED [8086]
2887
+ # IN EAX,DX ; o32 ED [386]
2888
+
2889
+ # |IN| reads a byte, word or doubleword from the specified I/O port, and
2890
+ # stores it in the given destination register. The port number may be
2891
+ # specified as an immediate value if it is between 0 and 255, and
2892
+ # otherwise must be stored in |DX|. See also |OUT| (section A.111
2893
+ # <#section-A.111>).
2894
+
2895
+
2896
+ # A.79 |INC|: Increment Integer
2897
+
2898
+ # INC reg16 ; o16 40+r [8086]
2899
+ # INC reg32 ; o32 40+r [386]
2900
+ # INC r/m8 ; FE /0 [8086]
2901
+ # INC r/m16 ; o16 FF /0 [8086]
2902
+ # INC r/m32 ; o32 FF /0 [386]
2903
+
2904
+ # |INC| adds 1 to its operand. It does /not/ affect the carry flag: to
2905
+ # affect the carry flag, use |ADD something,1| (see section A.6
2906
+ # <#section-A.6>). See also |DEC| (section A.24 <#section-A.24>).
2907
+
2908
+
2909
+ # A.80 |INSB|, |INSW|, |INSD|: Input String from I/O Port
2910
+
2911
+ # INSB ; 6C [186]
2912
+ # INSW ; o16 6D [186]
2913
+ # INSD ; o32 6D [386]
2914
+
2915
+ # |INSB| inputs a byte from the I/O port specified in |DX| and stores it
2916
+ # at |[ES:DI]| or |[ES:EDI]|. It then increments or decrements (depending
2917
+ # on the direction flag: increments if the flag is clear, decrements if it
2918
+ # is set) |DI| or |EDI|.
2919
+
2920
+ # The register used is |DI| if the address size is 16 bits, and |EDI| if
2921
+ # it is 32 bits. If you need to use an address size not equal to the
2922
+ # current |BITS| setting, you can use an explicit |a16| or |a32| prefix.
2923
+
2924
+ # Segment override prefixes have no effect for this instruction: the use
2925
+ # of |ES| for the load from |[DI]| or |[EDI]| cannot be overridden.
2926
+
2927
+ # |INSW| and |INSD| work in the same way, but they input a word or a
2928
+ # doubleword instead of a byte, and increment or decrement the addressing
2929
+ # register by 2 or 4 instead of 1.
2930
+
2931
+ # The |REP| prefix may be used to repeat the instruction |CX| (or |ECX| -
2932
+ # again, the address size chooses which) times.
2933
+
2934
+ # See also |OUTSB|, |OUTSW| and |OUTSD| (section A.112 <#section-A.112>).
2935
+
2936
+
2937
+ # A.81 |INT|: Software Interrupt
2938
+
2939
+ # INT imm8 ; CD ib [8086]
2940
+
2941
+ # |INT| causes a software interrupt through a specified vector number from
2942
+ # 0 to 255.
2943
+
2944
+ # The code generated by the |INT| instruction is always two bytes long:
2945
+ # although there are short forms for some |INT| instructions, NASM does
2946
+ # not generate them when it sees the |INT| mnemonic. In order to generate
2947
+ # single-byte breakpoint instructions, use the |INT3| or |INT1|
2948
+ # instructions (see section A.82 <#section-A.82>) instead.
2949
+
2950
+
2951
+ # A.82 |INT3|, |INT1|, |ICEBP|, |INT01|: Breakpoints
2952
+
2953
+ # INT1 ; F1 [P6]
2954
+ # ICEBP ; F1 [P6]
2955
+ # INT01 ; F1 [P6]
2956
+
2957
+ # INT3 ; CC [8086]
2958
+
2959
+ # |INT1| and |INT3| are short one-byte forms of the instructions |INT 1|
2960
+ # and |INT 3| (see section A.81 <#section-A.81>). They perform a similar
2961
+ # function to their longer counterparts, but take up less code space. They
2962
+ # are used as breakpoints by debuggers.
2963
+
2964
+ # |INT1|, and its alternative synonyms |INT01| and |ICEBP|, is an
2965
+ # instruction used by in-circuit emulators (ICEs). It is present, though
2966
+ # not documented, on some processors down to the 286, but is only
2967
+ # documented for the Pentium Pro. |INT3| is the instruction normally used
2968
+ # as a breakpoint by debuggers.
2969
+
2970
+ # |INT3| is not precisely equivalent to |INT 3|: the short form, since it
2971
+ # is designed to be used as a breakpoint, bypasses the normal IOPL checks
2972
+ # in virtual-8086 mode, and also does not go through interrupt redirection.
2973
+
2974
+
2975
+ # A.83 |INTO|: Interrupt if Overflow
2976
+
2977
+ # INTO ; CE [8086]
2978
+
2979
+ # |INTO| performs an |INT 4| software interrupt (see section A.81
2980
+ # <#section-A.81>) if and only if the overflow flag is set.
2981
+
2982
+
2983
+ # A.84 |INVD|: Invalidate Internal Caches
2984
+
2985
+ # INVD ; 0F 08 [486]
2986
+
2987
+ # |INVD| invalidates and empties the processor's internal caches, and
2988
+ # causes the processor to instruct external caches to do the same. It does
2989
+ # not write the contents of the caches back to memory first: any modified
2990
+ # data held in the caches will be lost. To write the data back first, use
2991
+ # |WBINVD| (section A.164 <#section-A.164>).
2992
+
2993
+
2994
+ # A.85 |INVLPG|: Invalidate TLB Entry
2995
+
2996
+ # INVLPG mem ; 0F 01 /0 [486]
2997
+
2998
+ # |INVLPG| invalidates the translation lookahead buffer (TLB) entry
2999
+ # associated with the supplied memory address.
3000
+
3001
+
3002
+ # A.86 |IRET|, |IRETW|, |IRETD|: Return from Interrupt
3003
+
3004
+ # IRET ; CF [8086]
3005
+ # IRETW ; o16 CF [8086]
3006
+ # IRETD ; o32 CF [386]
3007
+
3008
+ # |IRET| returns from an interrupt (hardware or software) by means of
3009
+ # popping |IP| (or |EIP|), |CS| and the flags off the stack and then
3010
+ # continuing execution from the new |CS:IP|.
3011
+
3012
+ # |IRETW| pops |IP|, |CS| and the flags as 2 bytes each, taking 6 bytes
3013
+ # off the stack in total. |IRETD| pops |EIP| as 4 bytes, pops a further 4
3014
+ # bytes of which the top two are discarded and the bottom two go into
3015
+ # |CS|, and pops the flags as 4 bytes as well, taking 12 bytes off the stack.
3016
+
3017
+ # |IRET| is a shorthand for either |IRETW| or |IRETD|, depending on the
3018
+ # default |BITS| setting at the time.
3019
+
3020
+
3021
+ # A.87 |JCXZ|, |JECXZ|: Jump if CX/ECX Zero
3022
+
3023
+ # JCXZ imm ; o16 E3 rb [8086]
3024
+ # JECXZ imm ; o32 E3 rb [386]
3025
+
3026
+ # |JCXZ| performs a short jump (with maximum range 128 bytes) if and only
3027
+ # if the contents of the |CX| register is 0. |JECXZ| does the same thing,
3028
+ # but with |ECX|.
3029
+
3030
+
3031
+ # A.88 |JMP|: Jump
3032
+
3033
+ # JMP imm ; E9 rw/rd [8086]
3034
+
3035
+ # JMP SHORT imm ; EB rb [8086]
3036
+ # JMP imm:imm16 ; o16 EA iw iw [8086]
3037
+ # JMP imm:imm32 ; o32 EA id iw [386]
3038
+
3039
+ # JMP FAR mem ; o16 FF /5 [8086]
3040
+ # JMP FAR mem ; o32 FF /5 [386]
3041
+ # JMP r/m16 ; o16 FF /4 [8086]
3042
+ # JMP r/m32 ; o32 FF /4 [386]
3043
+
3044
+ # |JMP| jumps to a given address. The address may be specified as an
3045
+ # absolute segment and offset, or as a relative jump within the current
3046
+ # segment.
3047
+
3048
+ # |JMP SHORT imm| has a maximum range of 128 bytes, since the displacement
3049
+ # is specified as only 8 bits, but takes up less code space. NASM does not
3050
+ # choose when to generate |JMP SHORT| for you: you must explicitly code
3051
+ # |SHORT| every time you want a short jump.
3052
+
3053
+ # You can choose between the two immediate far jump forms (|JMP imm:imm|)
3054
+ # by the use of the |WORD| and |DWORD| keywords: |JMP WORD 0x1234:0x5678|)
3055
+ # or |JMP DWORD 0x1234:0x56789abc|.
3056
+
3057
+ # The |JMP FAR mem| forms execute a far jump by loading the destination
3058
+ # address out of memory. The address loaded consists of 16 or 32 bits of
3059
+ # offset (depending on the operand size), and 16 bits of segment. The
3060
+ # operand size may be overridden using |JMP WORD FAR mem| or |JMP DWORD
3061
+ # FAR mem|.
3062
+
3063
+ # The |JMP r/m| forms execute a near jump (within the same segment),
3064
+ # loading the destination address out of memory or out of a register. The
3065
+ # keyword |NEAR| may be specified, for clarity, in these forms, but is not
3066
+ # necessary. Again, operand size can be overridden using |JMP WORD mem| or
3067
+ # |JMP DWORD mem|.
3068
+
3069
+ # As a convenience, NASM does not require you to jump to a far symbol by
3070
+ # coding the cumbersome |JMP SEG routine:routine|, but instead allows the
3071
+ # easier synonym |JMP FAR routine|.
3072
+
3073
+ # The |CALL r/m| forms given above are near calls; NASM will accept the
3074
+ # |NEAR| keyword (e.g. |CALL NEAR [address]|), even though it is not
3075
+ # strictly necessary.
3076
+
3077
+
3078
+ # A.89 |Jcc|: Conditional Branch
3079
+
3080
+ # Jcc imm ; 70+cc rb [8086]
3081
+ # Jcc NEAR imm ; 0F 80+cc rw/rd [386]
3082
+
3083
+ # The conditional jump instructions execute a near (same segment) jump if
3084
+ # and only if their conditions are satisfied. For example, |JNZ| jumps
3085
+ # only if the zero flag is not set.
3086
+
3087
+ # The ordinary form of the instructions has only a 128-byte range; the
3088
+ # |NEAR| form is a 386 extension to the instruction set, and can span the
3089
+ # full size of a segment. NASM will not override your choice of jump
3090
+ # instruction: if you want |Jcc NEAR|, you have to use the |NEAR| keyword.
3091
+
3092
+ # The |SHORT| keyword is allowed on the first form of the instruction, for
3093
+ # clarity, but is not necessary.
3094
+
3095
+
3096
+ # A.90 |LAHF|: Load AH from Flags
3097
+
3098
+ # LAHF ; 9F [8086]
3099
+
3100
+ # |LAHF| sets the |AH| register according to the contents of the low byte
3101
+ # of the flags word. See also |SAHF| (section A.145 <#section-A.145>).
3102
+
3103
+
3104
+ # A.91 |LAR|: Load Access Rights
3105
+
3106
+ # LAR reg16,r/m16 ; o16 0F 02 /r [286,PRIV]
3107
+ # LAR reg32,r/m32 ; o32 0F 02 /r [286,PRIV]
3108
+
3109
+ # |LAR| takes the segment selector specified by its source (second)
3110
+ # operand, finds the corresponding segment descriptor in the GDT or LDT,
3111
+ # and loads the access-rights byte of the descriptor into its destination
3112
+ # (first) operand.
3113
+
3114
+
3115
+ # A.92 |LDS|, |LES|, |LFS|, |LGS|, |LSS|: Load Far Pointer
3116
+
3117
+ # LDS reg16,mem ; o16 C5 /r [8086]
3118
+ # LDS reg32,mem ; o32 C5 /r [8086]
3119
+
3120
+ # LES reg16,mem ; o16 C4 /r [8086]
3121
+ # LES reg32,mem ; o32 C4 /r [8086]
3122
+
3123
+ # LFS reg16,mem ; o16 0F B4 /r [386]
3124
+ # LFS reg32,mem ; o32 0F B4 /r [386]
3125
+
3126
+ # LGS reg16,mem ; o16 0F B5 /r [386]
3127
+ # LGS reg32,mem ; o32 0F B5 /r [386]
3128
+
3129
+ # LSS reg16,mem ; o16 0F B2 /r [386]
3130
+ # LSS reg32,mem ; o32 0F B2 /r [386]
3131
+
3132
+ # These instructions load an entire far pointer (16 or 32 bits of offset,
3133
+ # plus 16 bits of segment) out of memory in one go. |LDS|, for example,
3134
+ # loads 16 or 32 bits from the given memory address into the given
3135
+ # register (depending on the size of the register), then loads the /next/
3136
+ # 16 bits from memory into |DS|. |LES|, |LFS|, |LGS| and |LSS| work in the
3137
+ # same way but use the other segment registers.
3138
+
3139
+
3140
+ # A.93 |LEA|: Load Effective Address
3141
+
3142
+ # LEA reg16,mem ; o16 8D /r [8086]
3143
+ # LEA reg32,mem ; o32 8D /r [8086]
3144
+
3145
+ # |LEA|, despite its syntax, does not access memory. It calculates the
3146
+ # effective address specified by its second operand as if it were going to
3147
+ # load or store data from it, but instead it stores the calculated address
3148
+ # into the register specified by its first operand. This can be used to
3149
+ # perform quite complex calculations (e.g. |LEA EAX,[EBX+ECX*4+100]|) in
3150
+ # one instruction.
3151
+
3152
+ # |LEA|, despite being a purely arithmetic instruction which accesses no
3153
+ # memory, still requires square brackets around its second operand, as if
3154
+ # it were a memory reference.
3155
+
3156
+
3157
+ # A.94 |LEAVE|: Destroy Stack Frame
3158
+
3159
+ # LEAVE ; C9 [186]
3160
+
3161
+ # |LEAVE| destroys a stack frame of the form created by the |ENTER|
3162
+ # instruction (see section A.27 <#section-A.27>). It is functionally
3163
+ # equivalent to |MOV ESP,EBP| followed by |POP EBP| (or |MOV SP,BP|
3164
+ # followed by |POP BP| in 16-bit mode).
3165
+
3166
+
3167
+ # A.95 |LGDT|, |LIDT|, |LLDT|: Load Descriptor Tables
3168
+
3169
+ # LGDT mem ; 0F 01 /2 [286,PRIV]
3170
+ # LIDT mem ; 0F 01 /3 [286,PRIV]
3171
+ # LLDT r/m16 ; 0F 00 /2 [286,PRIV]
3172
+
3173
+ # |LGDT| and |LIDT| both take a 6-byte memory area as an operand: they
3174
+ # load a 32-bit linear address and a 16-bit size limit from that area (in
3175
+ # the opposite order) into the GDTR (global descriptor table register) or
3176
+ # IDTR (interrupt descriptor table register). These are the only
3177
+ # instructions which directly use /linear/ addresses, rather than
3178
+ # segment/offset pairs.
3179
+
3180
+ # |LLDT| takes a segment selector as an operand. The processor looks up
3181
+ # that selector in the GDT and stores the limit and base address given
3182
+ # there into the LDTR (local descriptor table register).
3183
+
3184
+ # See also |SGDT|, |SIDT| and |SLDT| (section A.151 <#section-A.151>).
3185
+
3186
+
3187
+ # A.96 |LMSW|: Load/Store Machine Status Word
3188
+
3189
+ # LMSW r/m16 ; 0F 01 /6 [286,PRIV]
3190
+
3191
+ # |LMSW| loads the bottom four bits of the source operand into the bottom
3192
+ # four bits of the |CR0| control register (or the Machine Status Word, on
3193
+ # 286 processors). See also |SMSW| (section A.155 <#section-A.155>).
3194
+
3195
+
3196
+ # A.97 |LOADALL|, |LOADALL286|: Load Processor State
3197
+
3198
+ # LOADALL ; 0F 07 [386,UNDOC]
3199
+ # LOADALL286 ; 0F 05 [286,UNDOC]
3200
+
3201
+ # This instruction, in its two different-opcode forms, is apparently
3202
+ # supported on most 286 processors, some 386 and possibly some 486. The
3203
+ # opcode differs between the 286 and the 386.
3204
+
3205
+ # The function of the instruction is to load all information relating to
3206
+ # the state of the processor out of a block of memory: on the 286, this
3207
+ # block is located implicitly at absolute address |0x800|, and on the 386
3208
+ # and 486 it is at |[ES:EDI]|.
3209
+
3210
+
3211
+ # A.98 |LODSB|, |LODSW|, |LODSD|: Load from String
3212
+
3213
+ # LODSB ; AC [8086]
3214
+ # LODSW ; o16 AD [8086]
3215
+ # LODSD ; o32 AD [386]
3216
+
3217
+ # |LODSB| loads a byte from |[DS:SI]| or |[DS:ESI]| into |AL|. It then
3218
+ # increments or decrements (depending on the direction flag: increments if
3219
+ # the flag is clear, decrements if it is set) |SI| or |ESI|.
3220
+
3221
+ # The register used is |SI| if the address size is 16 bits, and |ESI| if
3222
+ # it is 32 bits. If you need to use an address size not equal to the
3223
+ # current |BITS| setting, you can use an explicit |a16| or |a32| prefix.
3224
+
3225
+ # The segment register used to load from |[SI]| or |[ESI]| can be
3226
+ # overridden by using a segment register name as a prefix (for example,
3227
+ # |es lodsb|).
3228
+
3229
+ # |LODSW| and |LODSD| work in the same way, but they load a word or a
3230
+ # doubleword instead of a byte, and increment or decrement the addressing
3231
+ # registers by 2 or 4 instead of 1.
3232
+
3233
+
3234
+ # A.99 |LOOP|, |LOOPE|, |LOOPZ|, |LOOPNE|, |LOOPNZ|: Loop with Counter
3235
+
3236
+ # LOOP imm ; E2 rb [8086]
3237
+ # LOOP imm,CX ; a16 E2 rb [8086]
3238
+ # LOOP imm,ECX ; a32 E2 rb [386]
3239
+
3240
+ # LOOPE imm ; E1 rb [8086]
3241
+ # LOOPE imm,CX ; a16 E1 rb [8086]
3242
+ # LOOPE imm,ECX ; a32 E1 rb [386]
3243
+ # LOOPZ imm ; E1 rb [8086]
3244
+ # LOOPZ imm,CX ; a16 E1 rb [8086]
3245
+ # LOOPZ imm,ECX ; a32 E1 rb [386]
3246
+
3247
+ # LOOPNE imm ; E0 rb [8086]
3248
+ # LOOPNE imm,CX ; a16 E0 rb [8086]
3249
+ # LOOPNE imm,ECX ; a32 E0 rb [386]
3250
+ # LOOPNZ imm ; E0 rb [8086]
3251
+ # LOOPNZ imm,CX ; a16 E0 rb [8086]
3252
+ # LOOPNZ imm,ECX ; a32 E0 rb [386]
3253
+
3254
+ # |LOOP| decrements its counter register (either |CX| or |ECX| - if one is
3255
+ # not specified explicitly, the |BITS| setting dictates which is used) by
3256
+ # one, and if the counter does not become zero as a result of this
3257
+ # operation, it jumps to the given label. The jump has a range of 128 bytes.
3258
+
3259
+ # |LOOPE| (or its synonym |LOOPZ|) adds the additional condition that it
3260
+ # only jumps if the counter is nonzero /and/ the zero flag is set.
3261
+ # Similarly, |LOOPNE| (and |LOOPNZ|) jumps only if the counter is nonzero
3262
+ # and the zero flag is clear.
3263
+
3264
+
3265
+ # A.100 |LSL|: Load Segment Limit
3266
+
3267
+ # LSL reg16,r/m16 ; o16 0F 03 /r [286,PRIV]
3268
+ # LSL reg32,r/m32 ; o32 0F 03 /r [286,PRIV]
3269
+
3270
+ # |LSL| is given a segment selector in its source (second) operand; it
3271
+ # computes the segment limit value by loading the segment limit field from
3272
+ # the associated segment descriptor in the GDT or LDT. (This involves
3273
+ # shifting left by 12 bits if the segment limit is page-granular, and not
3274
+ # if it is byte-granular; so you end up with a byte limit in either case.)
3275
+ # The segment limit obtained is then loaded into the destination (first)
3276
+ # operand.
3277
+
3278
+
3279
+ # A.101 |LTR|: Load Task Register
3280
+
3281
+ # LTR r/m16 ; 0F 00 /3 [286,PRIV]
3282
+
3283
+ # |LTR| looks up the segment base and limit in the GDT or LDT descriptor
3284
+ # specified by the segment selector given as its operand, and loads them
3285
+ # into the Task Register.
3286
+
3287
+
3288
+ # A.102 |MOV|: Move Data
3289
+
3290
+ # MOV r/m8,reg8 ; 88 /r [8086]
3291
+ # MOV r/m16,reg16 ; o16 89 /r [8086]
3292
+ # MOV r/m32,reg32 ; o32 89 /r [386]
3293
+ # MOV reg8,r/m8 ; 8A /r [8086]
3294
+ # MOV reg16,r/m16 ; o16 8B /r [8086]
3295
+ # MOV reg32,r/m32 ; o32 8B /r [386]
3296
+
3297
+ # MOV reg8,imm8 ; B0+r ib [8086]
3298
+ # MOV reg16,imm16 ; o16 B8+r iw [8086]
3299
+ # MOV reg32,imm32 ; o32 B8+r id [386]
3300
+ # MOV r/m8,imm8 ; C6 /0 ib [8086]
3301
+ # MOV r/m16,imm16 ; o16 C7 /0 iw [8086]
3302
+ # MOV r/m32,imm32 ; o32 C7 /0 id [386]
3303
+
3304
+ # MOV AL,memoffs8 ; A0 ow/od [8086]
3305
+ # MOV AX,memoffs16 ; o16 A1 ow/od [8086]
3306
+ # MOV EAX,memoffs32 ; o32 A1 ow/od [386]
3307
+ # MOV memoffs8,AL ; A2 ow/od [8086]
3308
+ # MOV memoffs16,AX ; o16 A3 ow/od [8086]
3309
+ # MOV memoffs32,EAX ; o32 A3 ow/od [386]
3310
+
3311
+ # MOV r/m16,segreg ; o16 8C /r [8086]
3312
+ # MOV r/m32,segreg ; o32 8C /r [386]
3313
+ # MOV segreg,r/m16 ; o16 8E /r [8086]
3314
+ # MOV segreg,r/m32 ; o32 8E /r [386]
3315
+
3316
+ # MOV reg32,CR0/2/3/4 ; 0F 20 /r [386]
3317
+ # MOV reg32,DR0/1/2/3/6/7 ; 0F 21 /r [386]
3318
+ # MOV reg32,TR3/4/5/6/7 ; 0F 24 /r [386]
3319
+ # MOV CR0/2/3/4,reg32 ; 0F 22 /r [386]
3320
+ # MOV DR0/1/2/3/6/7,reg32 ; 0F 23 /r [386]
3321
+ # MOV TR3/4/5/6/7,reg32 ; 0F 26 /r [386]
3322
+
3323
+ # |MOV| copies the contents of its source (second) operand into its
3324
+ # destination (first) operand.
3325
+
3326
+ # In all forms of the |MOV| instruction, the two operands are the same
3327
+ # size, except for moving between a segment register and an |r/m32|
3328
+ # operand. These instructions are treated exactly like the corresponding
3329
+ # 16-bit equivalent (so that, for example, |MOV DS,EAX| functions
3330
+ # identically to |MOV DS,AX| but saves a prefix when in 32-bit mode),
3331
+ # except that when a segment register is moved into a 32-bit destination,
3332
+ # the top two bytes of the result are undefined.
3333
+
3334
+ # |MOV| may not use |CS| as a destination.
3335
+
3336
+ # |CR4| is only a supported register on the Pentium and above.
3337
+
3338
+
3339
+ # A.103 |MOVD|: Move Doubleword to/from MMX Register
3340
+
3341
+ # MOVD mmxreg,r/m32 ; 0F 6E /r [PENT,MMX]
3342
+ # MOVD r/m32,mmxreg ; 0F 7E /r [PENT,MMX]
3343
+
3344
+ # |MOVD| copies 32 bits from its source (second) operand into its
3345
+ # destination (first) operand. When the destination is a 64-bit MMX
3346
+ # register, the top 32 bits are set to zero.
3347
+
3348
+
3349
+ # A.104 |MOVQ|: Move Quadword to/from MMX Register
3350
+
3351
+ # MOVQ mmxreg,r/m64 ; 0F 6F /r [PENT,MMX]
3352
+ # MOVQ r/m64,mmxreg ; 0F 7F /r [PENT,MMX]
3353
+
3354
+ # |MOVQ| copies 64 bits from its source (second) operand into its
3355
+ # destination (first) operand.
3356
+
3357
+
3358
+ # A.105 |MOVSB|, |MOVSW|, |MOVSD|: Move String
3359
+
3360
+ # MOVSB ; A4 [8086]
3361
+ # MOVSW ; o16 A5 [8086]
3362
+ # MOVSD ; o32 A5 [386]
3363
+
3364
+ # |MOVSB| copies the byte at |[ES:DI]| or |[ES:EDI]| to |[DS:SI]| or
3365
+ # |[DS:ESI]|. It then increments or decrements (depending on the direction
3366
+ # flag: increments if the flag is clear, decrements if it is set) |SI| and
3367
+ # |DI| (or |ESI| and |EDI|).
3368
+
3369
+ # The registers used are |SI| and |DI| if the address size is 16 bits, and
3370
+ # |ESI| and |EDI| if it is 32 bits. If you need to use an address size not
3371
+ # equal to the current |BITS| setting, you can use an explicit |a16| or
3372
+ # |a32| prefix.
3373
+
3374
+ # The segment register used to load from |[SI]| or |[ESI]| can be
3375
+ # overridden by using a segment register name as a prefix (for example,
3376
+ # |es movsb|). The use of |ES| for the store to |[DI]| or |[EDI]| cannot
3377
+ # be overridden.
3378
+
3379
+ # |MOVSW| and |MOVSD| work in the same way, but they copy a word or a
3380
+ # doubleword instead of a byte, and increment or decrement the addressing
3381
+ # registers by 2 or 4 instead of 1.
3382
+
3383
+ # The |REP| prefix may be used to repeat the instruction |CX| (or |ECX| -
3384
+ # again, the address size chooses which) times.
3385
+
3386
+
3387
+ # A.106 |MOVSX|, |MOVZX|: Move Data with Sign or Zero Extend
3388
+
3389
+ # MOVSX reg16,r/m8 ; o16 0F BE /r [386]
3390
+ # MOVSX reg32,r/m8 ; o32 0F BE /r [386]
3391
+ # MOVSX reg32,r/m16 ; o32 0F BF /r [386]
3392
+
3393
+ # MOVZX reg16,r/m8 ; o16 0F B6 /r [386]
3394
+ # MOVZX reg32,r/m8 ; o32 0F B6 /r [386]
3395
+ # MOVZX reg32,r/m16 ; o32 0F B7 /r [386]
3396
+
3397
+ # |MOVSX| sign-extends its source (second) operand to the length of its
3398
+ # destination (first) operand, and copies the result into the destination
3399
+ # operand. |MOVZX| does the same, but zero-extends rather than
3400
+ # sign-extending.
3401
+
3402
+
3403
+ # A.107 |MUL|: Unsigned Integer Multiply
3404
+
3405
+ # MUL r/m8 ; F6 /4 [8086]
3406
+ # MUL r/m16 ; o16 F7 /4 [8086]
3407
+ # MUL r/m32 ; o32 F7 /4 [386]
3408
+
3409
+ # |MUL| performs unsigned integer multiplication. The other operand to the
3410
+ # multiplication, and the destination operand, are implicit, in the
3411
+ # following way:
3412
+
3413
+ # * For |MUL r/m8|, |AL| is multiplied by the given operand; the
3414
+ # product is stored in |AX|.
3415
+ # * For |MUL r/m16|, |AX| is multiplied by the given operand; the
3416
+ # product is stored in |DX:AX|.
3417
+ # * For |MUL r/m32|, |EAX| is multiplied by the given operand; the
3418
+ # product is stored in |EDX:EAX|.
3419
+
3420
+ # Signed integer multiplication is performed by the |IMUL| instruction:
3421
+ # see section A.77 <#section-A.77>.
3422
+
3423
+
3424
+ # A.108 |NEG|, |NOT|: Two's and One's Complement
3425
+
3426
+ # NEG r/m8 ; F6 /3 [8086]
3427
+ # NEG r/m16 ; o16 F7 /3 [8086]
3428
+ # NEG r/m32 ; o32 F7 /3 [386]
3429
+
3430
+ # NOT r/m8 ; F6 /2 [8086]
3431
+ # NOT r/m16 ; o16 F7 /2 [8086]
3432
+ # NOT r/m32 ; o32 F7 /2 [386]
3433
+
3434
+ # |NEG| replaces the contents of its operand by the two's complement
3435
+ # negation (invert all the bits and then add one) of the original value.
3436
+ # |NOT|, similarly, performs one's complement (inverts all the bits).
3437
+
3438
+
3439
+ # A.109 |NOP|: No Operation
3440
+
3441
+ # NOP ; 90 [8086]
3442
+
3443
+ # |NOP| performs no operation. Its opcode is the same as that generated by
3444
+ # |XCHG AX,AX| or |XCHG EAX,EAX| (depending on the processor mode; see
3445
+ # section A.168 <#section-A.168>).
3446
+
3447
+
3448
+ # A.110 |OR|: Bitwise OR
3449
+
3450
+ # OR r/m8,reg8 ; 08 /r [8086]
3451
+ # OR r/m16,reg16 ; o16 09 /r [8086]
3452
+ # OR r/m32,reg32 ; o32 09 /r [386]
3453
+
3454
+ # OR reg8,r/m8 ; 0A /r [8086]
3455
+ # OR reg16,r/m16 ; o16 0B /r [8086]
3456
+ # OR reg32,r/m32 ; o32 0B /r [386]
3457
+
3458
+ # OR r/m8,imm8 ; 80 /1 ib [8086]
3459
+ # OR r/m16,imm16 ; o16 81 /1 iw [8086]
3460
+ # OR r/m32,imm32 ; o32 81 /1 id [386]
3461
+
3462
+ # OR r/m16,imm8 ; o16 83 /1 ib [8086]
3463
+ # OR r/m32,imm8 ; o32 83 /1 ib [386]
3464
+
3465
+ # OR AL,imm8 ; 0C ib [8086]
3466
+ # OR AX,imm16 ; o16 0D iw [8086]
3467
+ # OR EAX,imm32 ; o32 0D id [386]
3468
+
3469
+ # |OR| performs a bitwise OR operation between its two operands (i.e. each
3470
+ # bit of the result is 1 if and only if at least one of the corresponding
3471
+ # bits of the two inputs was 1), and stores the result in the destination
3472
+ # (first) operand.
3473
+
3474
+ # In the forms with an 8-bit immediate second operand and a longer first
3475
+ # operand, the second operand is considered to be signed, and is
3476
+ # sign-extended to the length of the first operand. In these cases, the
3477
+ # |BYTE| qualifier is necessary to force NASM to generate this form of the
3478
+ # instruction.
3479
+
3480
+ # The MMX instruction |POR| (see section A.129 <#section-A.129>) performs
3481
+ # the same operation on the 64-bit MMX registers.
3482
+
3483
+
3484
+ # A.111 |OUT|: Output Data to I/O Port
3485
+
3486
+ # OUT imm8,AL ; E6 ib [8086]
3487
+ # OUT imm8,AX ; o16 E7 ib [8086]
3488
+ # OUT imm8,EAX ; o32 E7 ib [386]
3489
+ # OUT DX,AL ; EE [8086]
3490
+ # OUT DX,AX ; o16 EF [8086]
3491
+ # OUT DX,EAX ; o32 EF [386]
3492
+
3493
+ # |IN| writes the contents of the given source register to the specified
3494
+ # I/O port. The port number may be specified as an immediate value if it
3495
+ # is between 0 and 255, and otherwise must be stored in |DX|. See also
3496
+ # |IN| (section A.78 <#section-A.78>).
3497
+
3498
+
3499
+ # A.112 |OUTSB|, |OUTSW|, |OUTSD|: Output String to I/O Port
3500
+
3501
+ # OUTSB ; 6E [186]
3502
+
3503
+ # OUTSW ; o16 6F [186]
3504
+
3505
+ # OUTSD ; o32 6F [386]
3506
+
3507
+ # |OUTSB| loads a byte from |[DS:SI]| or |[DS:ESI]| and writes it to the
3508
+ # I/O port specified in |DX|. It then increments or decrements (depending
3509
+ # on the direction flag: increments if the flag is clear, decrements if it
3510
+ # is set) |SI| or |ESI|.
3511
+
3512
+ # The register used is |SI| if the address size is 16 bits, and |ESI| if
3513
+ # it is 32 bits. If you need to use an address size not equal to the
3514
+ # current |BITS| setting, you can use an explicit |a16| or |a32| prefix.
3515
+
3516
+ # The segment register used to load from |[SI]| or |[ESI]| can be
3517
+ # overridden by using a segment register name as a prefix (for example,
3518
+ # |es outsb|).
3519
+
3520
+ # |OUTSW| and |OUTSD| work in the same way, but they output a word or a
3521
+ # doubleword instead of a byte, and increment or decrement the addressing
3522
+ # registers by 2 or 4 instead of 1.
3523
+
3524
+ # The |REP| prefix may be used to repeat the instruction |CX| (or |ECX| -
3525
+ # again, the address size chooses which) times.
3526
+
3527
+
3528
+ # A.113 |PACKSSDW|, |PACKSSWB|, |PACKUSWB|: Pack Data
3529
+
3530
+ # PACKSSDW mmxreg,r/m64 ; 0F 6B /r [PENT,MMX]
3531
+ # PACKSSWB mmxreg,r/m64 ; 0F 63 /r [PENT,MMX]
3532
+ # PACKUSWB mmxreg,r/m64 ; 0F 67 /r [PENT,MMX]
3533
+
3534
+ # All these instructions start by forming a notional 128-bit word by
3535
+ # placing the source (second) operand on the left of the destination
3536
+ # (first) operand. |PACKSSDW| then splits this 128-bit word into four
3537
+ # doublewords, converts each to a word, and loads them side by side into
3538
+ # the destination register; |PACKSSWB| and |PACKUSWB| both split the
3539
+ # 128-bit word into eight words, converts each to a byte, and loads
3540
+ # /those/ side by side into the destination register.
3541
+
3542
+ # |PACKSSDW| and |PACKSSWB| perform signed saturation when reducing the
3543
+ # length of numbers: if the number is too large to fit into the reduced
3544
+ # space, they replace it by the largest signed number (|7FFFh| or |7Fh|)
3545
+ # that /will/ fit, and if it is too small then they replace it by the
3546
+ # smallest signed number (|8000h| or |80h|) that will fit. |PACKUSWB|
3547
+ # performs unsigned saturation: it treats its input as unsigned, and
3548
+ # replaces it by the largest unsigned number that will fit.
3549
+
3550
+
3551
+ # A.114 |PADDxx|: MMX Packed Addition
3552
+
3553
+ # PADDB mmxreg,r/m64 ; 0F FC /r [PENT,MMX]
3554
+ # PADDW mmxreg,r/m64 ; 0F FD /r [PENT,MMX]
3555
+ # PADDD mmxreg,r/m64 ; 0F FE /r [PENT,MMX]
3556
+
3557
+ # PADDSB mmxreg,r/m64 ; 0F EC /r [PENT,MMX]
3558
+ # PADDSW mmxreg,r/m64 ; 0F ED /r [PENT,MMX]
3559
+
3560
+ # PADDUSB mmxreg,r/m64 ; 0F DC /r [PENT,MMX]
3561
+ # PADDUSW mmxreg,r/m64 ; 0F DD /r [PENT,MMX]
3562
+
3563
+ # |PADDxx| all perform packed addition between their two 64-bit operands,
3564
+ # storing the result in the destination (first) operand. The |PADDxB|
3565
+ # forms treat the 64-bit operands as vectors of eight bytes, and add each
3566
+ # byte individually; |PADDxW| treat the operands as vectors of four words;
3567
+ # and |PADDD| treats its operands as vectors of two doublewords.
3568
+
3569
+ # |PADDSB| and |PADDSW| perform signed saturation on the sum of each pair
3570
+ # of bytes or words: if the result of an addition is too large or too
3571
+ # small to fit into a signed byte or word result, it is clipped
3572
+ # (saturated) to the largest or smallest value which /will/ fit. |PADDUSB|
3573
+ # and |PADDUSW| similarly perform unsigned saturation, clipping to |0FFh|
3574
+ # or |0FFFFh| if the result is larger than that.
3575
+
3576
+
3577
+ # A.115 |PADDSIW|: MMX Packed Addition to Implicit Destination
3578
+
3579
+ # PADDSIW mmxreg,r/m64 ; 0F 51 /r [CYRIX,MMX]
3580
+
3581
+ # |PADDSIW|, specific to the Cyrix extensions to the MMX instruction set,
3582
+ # performs the same function as |PADDSW|, except that the result is not
3583
+ # placed in the register specified by the first operand, but instead in
3584
+ # the register whose number differs from the first operand only in the
3585
+ # last bit. So |PADDSIW MM0,MM2| would put the result in |MM1|, but
3586
+ # |PADDSIW MM1,MM2| would put the result in |MM0|.
3587
+
3588
+
3589
+ # A.116 |PAND|, |PANDN|: MMX Bitwise AND and AND-NOT
3590
+
3591
+ # PAND mmxreg,r/m64 ; 0F DB /r [PENT,MMX]
3592
+ # PANDN mmxreg,r/m64 ; 0F DF /r [PENT,MMX]
3593
+
3594
+ # |PAND| performs a bitwise AND operation between its two operands (i.e.
3595
+ # each bit of the result is 1 if and only if the corresponding bits of the
3596
+ # two inputs were both 1), and stores the result in the destination
3597
+ # (first) operand.
3598
+
3599
+ # |PANDN| performs the same operation, but performs a one's complement
3600
+ # operation on the destination (first) operand first.
3601
+
3602
+
3603
+ # A.117 |PAVEB|: MMX Packed Average
3604
+
3605
+ # PAVEB mmxreg,r/m64 ; 0F 50 /r [CYRIX,MMX]
3606
+
3607
+ # |PAVEB|, specific to the Cyrix MMX extensions, treats its two operands
3608
+ # as vectors of eight unsigned bytes, and calculates the average of the
3609
+ # corresponding bytes in the operands. The resulting vector of eight
3610
+ # averages is stored in the first operand.
3611
+
3612
+
3613
+ # A.118 |PCMPxx|: MMX Packed Comparison
3614
+
3615
+ # PCMPEQB mmxreg,r/m64 ; 0F 74 /r [PENT,MMX]
3616
+ # PCMPEQW mmxreg,r/m64 ; 0F 75 /r [PENT,MMX]
3617
+ # PCMPEQD mmxreg,r/m64 ; 0F 76 /r [PENT,MMX]
3618
+
3619
+ # PCMPGTB mmxreg,r/m64 ; 0F 64 /r [PENT,MMX]
3620
+ # PCMPGTW mmxreg,r/m64 ; 0F 65 /r [PENT,MMX]
3621
+ # PCMPGTD mmxreg,r/m64 ; 0F 66 /r [PENT,MMX]
3622
+
3623
+ # The |PCMPxx| instructions all treat their operands as vectors of bytes,
3624
+ # words, or doublewords; corresponding elements of the source and
3625
+ # destination are compared, and the corresponding element of the
3626
+ # destination (first) operand is set to all zeros or all ones depending on
3627
+ # the result of the comparison.
3628
+
3629
+ # |PCMPxxB| treats the operands as vectors of eight bytes, |PCMPxxW|
3630
+ # treats them as vectors of four words, and |PCMPxxD| as two doublewords.
3631
+
3632
+ # |PCMPEQx| sets the corresponding element of the destination operand to
3633
+ # all ones if the two elements compared are equal; |PCMPGTx| sets the
3634
+ # destination element to all ones if the element of the first
3635
+ # (destination) operand is greater (treated as a signed integer) than that
3636
+ # of the second (source) operand.
3637
+
3638
+
3639
+ # A.119 |PDISTIB|: MMX Packed Distance and Accumulate with Implied
3640
+ # Register
3641
+
3642
+ # PDISTIB mmxreg,mem64 ; 0F 54 /r [CYRIX,MMX]
3643
+
3644
+ # |PDISTIB|, specific to the Cyrix MMX extensions, treats its two input
3645
+ # operands as vectors of eight unsigned bytes. For each byte position, it
3646
+ # finds the absolute difference between the bytes in that position in the
3647
+ # two input operands, and adds that value to the byte in the same position
3648
+ # in the implied output register. The addition is saturated to an unsigned
3649
+ # byte in the same way as |PADDUSB|.
3650
+
3651
+ # The implied output register is found in the same way as |PADDSIW|
3652
+ # (section A.115 <#section-A.115>).
3653
+
3654
+ # Note that |PDISTIB| cannot take a register as its second source operand.
3655
+
3656
+
3657
+ # A.120 |PMACHRIW|: MMX Packed Multiply and Accumulate with Rounding
3658
+
3659
+ # PMACHRIW mmxreg,mem64 ; 0F 5E /r [CYRIX,MMX]
3660
+
3661
+ # |PMACHRIW| acts almost identically to |PMULHRIW| (section A.123
3662
+ # <#section-A.123>), but instead of /storing/ its result in the implied
3663
+ # destination register, it /adds/ its result, as four packed words, to the
3664
+ # implied destination register. No saturation is done: the addition can
3665
+ # wrap around.
3666
+
3667
+ # Note that |PMACHRIW| cannot take a register as its second source operand.
3668
+
3669
+
3670
+ # A.121 |PMADDWD|: MMX Packed Multiply and Add
3671
+
3672
+ # PMADDWD mmxreg,r/m64 ; 0F F5 /r [PENT,MMX]
3673
+
3674
+ # |PMADDWD| treats its two inputs as vectors of four signed words. It
3675
+ # multiplies corresponding elements of the two operands, giving four
3676
+ # signed doubleword results. The top two of these are added and placed in
3677
+ # the top 32 bits of the destination (first) operand; the bottom two are
3678
+ # added and placed in the bottom 32 bits.
3679
+
3680
+
3681
+ # A.122 |PMAGW|: MMX Packed Magnitude
3682
+
3683
+ # PMAGW mmxreg,r/m64 ; 0F 52 /r [CYRIX,MMX]
3684
+
3685
+ # |PMAGW|, specific to the Cyrix MMX extensions, treats both its operands
3686
+ # as vectors of four signed words. It compares the absolute values of the
3687
+ # words in corresponding positions, and sets each word of the destination
3688
+ # (first) operand to whichever of the two words in that position had the
3689
+ # larger absolute value.
3690
+
3691
+
3692
+ # A.123 |PMULHRW|, |PMULHRIW|: MMX Packed Multiply High with Rounding
3693
+
3694
+ # PMULHRW mmxreg,r/m64 ; 0F 59 /r [CYRIX,MMX]
3695
+ # PMULHRIW mmxreg,r/m64 ; 0F 5D /r [CYRIX,MMX]
3696
+
3697
+ # These instructions, specific to the Cyrix MMX extensions, treat their
3698
+ # operands as vectors of four signed words. Words in corresponding
3699
+ # positions are multiplied, to give a 32-bit value in which bits 30 and 31
3700
+ # are guaranteed equal. Bits 30 to 15 of this value (bit mask
3701
+ # |0x7FFF8000|) are taken and stored in the corresponding position of the
3702
+ # destination operand, after first rounding the low bit (equivalent to
3703
+ # adding |0x4000| before extracting bits 30 to 15).
3704
+
3705
+ # For |PMULHRW|, the destination operand is the first operand; for
3706
+ # |PMULHRIW| the destination operand is implied by the first operand in
3707
+ # the manner of |PADDSIW| (section A.115 <#section-A.115>).
3708
+
3709
+
3710
+ # A.124 |PMULHW|, |PMULLW|: MMX Packed Multiply
3711
+
3712
+ # PMULHW mmxreg,r/m64 ; 0F E5 /r [PENT,MMX]
3713
+ # PMULLW mmxreg,r/m64 ; 0F D5 /r [PENT,MMX]
3714
+
3715
+ # |PMULxW| treats its two inputs as vectors of four signed words. It
3716
+ # multiplies corresponding elements of the two operands, giving four
3717
+ # signed doubleword results.
3718
+
3719
+ # |PMULHW| then stores the top 16 bits of each doubleword in the
3720
+ # destination (first) operand; |PMULLW| stores the bottom 16 bits of each
3721
+ # doubleword in the destination operand.
3722
+
3723
+
3724
+ # A.125 |PMVccZB|: MMX Packed Conditional Move
3725
+
3726
+ # PMVZB mmxreg,mem64 ; 0F 58 /r [CYRIX,MMX]
3727
+ # PMVNZB mmxreg,mem64 ; 0F 5A /r [CYRIX,MMX]
3728
+ # PMVLZB mmxreg,mem64 ; 0F 5B /r [CYRIX,MMX]
3729
+ # PMVGEZB mmxreg,mem64 ; 0F 5C /r [CYRIX,MMX]
3730
+
3731
+ # These instructions, specific to the Cyrix MMX extensions, perform
3732
+ # parallel conditional moves. The two input operands are treated as
3733
+ # vectors of eight bytes. Each byte of the destination (first) operand is
3734
+ # either written from the corresponding byte of the source (second)
3735
+ # operand, or left alone, depending on the value of the byte in the
3736
+ # /implied/ operand (specified in the same way as |PADDSIW|, in section
3737
+ # A.115 <#section-A.115>).
3738
+
3739
+ # |PMVZB| performs each move if the corresponding byte in the implied
3740
+ # operand is zero. |PMVNZB| moves if the byte is non-zero. |PMVLZB| moves
3741
+ # if the byte is less than zero, and |PMVGEZB| moves if the byte is
3742
+ # greater than or equal to zero.
3743
+
3744
+ # Note that these instructions cannot take a register as their second
3745
+ # source operand.
3746
+
3747
+
3748
+ # A.126 |POP|: Pop Data from Stack
3749
+
3750
+ # POP reg16 ; o16 58+r [8086]
3751
+ # POP reg32 ; o32 58+r [386]
3752
+
3753
+ # POP r/m16 ; o16 8F /0 [8086]
3754
+ # POP r/m32 ; o32 8F /0 [386]
3755
+
3756
+ # POP CS ; 0F [8086,UNDOC]
3757
+ # POP DS ; 1F [8086]
3758
+ # POP ES ; 07 [8086]
3759
+ # POP SS ; 17 [8086]
3760
+ # POP FS ; 0F A1 [386]
3761
+ # POP GS ; 0F A9 [386]
3762
+
3763
+ # |POP| loads a value from the stack (from |[SS:SP]| or |[SS:ESP]|) and
3764
+ # then increments the stack pointer.
3765
+
3766
+ # The address-size attribute of the instruction determines whether |SP| or
3767
+ # |ESP| is used as the stack pointer: to deliberately override the default
3768
+ # given by the |BITS| setting, you can use an |a16| or |a32| prefix.
3769
+
3770
+ # The operand-size attribute of the instruction determines whether the
3771
+ # stack pointer is incremented by 2 or 4: this means that segment register
3772
+ # pops in |BITS 32| mode will pop 4 bytes off the stack and discard the
3773
+ # upper two of them. If you need to override that, you can use an |o16| or
3774
+ # |o32| prefix.
3775
+
3776
+ # The above opcode listings give two forms for general-purpose register
3777
+ # pop instructions: for example, |POP BX| has the two forms |5B| and |8F
3778
+ # C3|. NASM will always generate the shorter form when given |POP BX|.
3779
+ # NDISASM will disassemble both.
3780
+
3781
+ # |POP CS| is not a documented instruction, and is not supported on any
3782
+ # processor above the 8086 (since they use |0Fh| as an opcode prefix for
3783
+ # instruction set extensions). However, at least some 8086 processors do
3784
+ # support it, and so NASM generates it for completeness.
3785
+
3786
+
3787
+ # A.127 |POPAx|: Pop All General-Purpose Registers
3788
+
3789
+ # POPA ; 61 [186]
3790
+ # POPAW ; o16 61 [186]
3791
+ # POPAD ; o32 61 [386]
3792
+
3793
+ # |POPAW| pops a word from the stack into each of, successively, |DI|,
3794
+ # |SI|, |BP|, nothing (it discards a word from the stack which was a
3795
+ # placeholder for |SP|), |BX|, |DX|, |CX| and |AX|. It is intended to
3796
+ # reverse the operation of |PUSHAW| (see section A.135 <#section-A.135>),
3797
+ # but it ignores the value for |SP| that was pushed on the stack by |PUSHAW|.
3798
+
3799
+ # |POPAD| pops twice as much data, and places the results in |EDI|, |ESI|,
3800
+ # |EBP|, nothing (placeholder for |ESP|), |EBX|, |EDX|, |ECX| and |EAX|.
3801
+ # It reverses the operation of |PUSHAD|.
3802
+
3803
+ # |POPA| is an alias mnemonic for either |POPAW| or |POPAD|, depending on
3804
+ # the current |BITS| setting.
3805
+
3806
+ # Note that the registers are popped in reverse order of their numeric
3807
+ # values in opcodes (see section A.2.1 <#section-A.2.1>).
3808
+
3809
+
3810
+ # A.128 |POPFx|: Pop Flags Register
3811
+
3812
+ # POPF ; 9D [186]
3813
+ # POPFW ; o16 9D [186]
3814
+ # POPFD ; o32 9D [386]
3815
+
3816
+ # |POPFW| pops a word from the stack and stores it in the bottom 16 bits
3817
+ # of the flags register (or the whole flags register, on processors below
3818
+ # a 386). |POPFD| pops a doubleword and stores it in the entire flags
3819
+ # register.
3820
+
3821
+ # |POPF| is an alias mnemonic for either |POPFW| or |POPFD|, depending on
3822
+ # the current |BITS| setting.
3823
+
3824
+ # See also |PUSHF| (section A.136 <#section-A.136>).
3825
+
3826
+
3827
+ # A.129 |POR|: MMX Bitwise OR
3828
+
3829
+ # POR mmxreg,r/m64 ; 0F EB /r [PENT,MMX]
3830
+
3831
+ # |POR| performs a bitwise OR operation between its two operands (i.e.
3832
+ # each bit of the result is 1 if and only if at least one of the
3833
+ # corresponding bits of the two inputs was 1), and stores the result in
3834
+ # the destination (first) operand.
3835
+
3836
+
3837
+ # A.130 |PSLLx|, |PSRLx|, |PSRAx|: MMX Bit Shifts
3838
+
3839
+ # PSLLW mmxreg,r/m64 ; 0F F1 /r [PENT,MMX]
3840
+ # PSLLW mmxreg,imm8 ; 0F 71 /6 ib [PENT,MMX]
3841
+
3842
+ # PSLLD mmxreg,r/m64 ; 0F F2 /r [PENT,MMX]
3843
+ # PSLLD mmxreg,imm8 ; 0F 72 /6 ib [PENT,MMX]
3844
+
3845
+ # PSLLQ mmxreg,r/m64 ; 0F F3 /r [PENT,MMX]
3846
+ # PSLLQ mmxreg,imm8 ; 0F 73 /6 ib [PENT,MMX]
3847
+
3848
+ # PSRAW mmxreg,r/m64 ; 0F E1 /r [PENT,MMX]
3849
+ # PSRAW mmxreg,imm8 ; 0F 71 /4 ib [PENT,MMX]
3850
+
3851
+ # PSRAD mmxreg,r/m64 ; 0F E2 /r [PENT,MMX]
3852
+ # PSRAD mmxreg,imm8 ; 0F 72 /4 ib [PENT,MMX]
3853
+
3854
+ # PSRLW mmxreg,r/m64 ; 0F D1 /r [PENT,MMX]
3855
+ # PSRLW mmxreg,imm8 ; 0F 71 /2 ib [PENT,MMX]
3856
+
3857
+ # PSRLD mmxreg,r/m64 ; 0F D2 /r [PENT,MMX]
3858
+ # PSRLD mmxreg,imm8 ; 0F 72 /2 ib [PENT,MMX]
3859
+
3860
+ # PSRLQ mmxreg,r/m64 ; 0F D3 /r [PENT,MMX]
3861
+ # PSRLQ mmxreg,imm8 ; 0F 73 /2 ib [PENT,MMX]
3862
+
3863
+ # |PSxxQ| perform simple bit shifts on the 64-bit MMX registers: the
3864
+ # destination (first) operand is shifted left or right by the number of
3865
+ # bits given in the source (second) operand, and the vacated bits are
3866
+ # filled in with zeros (for a logical shift) or copies of the original
3867
+ # sign bit (for an arithmetic right shift).
3868
+
3869
+ # |PSxxW| and |PSxxD| perform packed bit shifts: the destination operand
3870
+ # is treated as a vector of four words or two doublewords, and each
3871
+ # element is shifted individually, so bits shifted out of one element do
3872
+ # not interfere with empty bits coming into the next.
3873
+
3874
+ # |PSLLx| and |PSRLx| perform logical shifts: the vacated bits at one end
3875
+ # of the shifted number are filled with zeros. |PSRAx| performs an
3876
+ # arithmetic right shift: the vacated bits at the top of the shifted
3877
+ # number are filled with copies of the original top (sign) bit.
3878
+
3879
+
3880
+ # A.131 |PSUBxx|: MMX Packed Subtraction
3881
+
3882
+ # PSUBB mmxreg,r/m64 ; 0F F8 /r [PENT,MMX]
3883
+ # PSUBW mmxreg,r/m64 ; 0F F9 /r [PENT,MMX]
3884
+ # PSUBD mmxreg,r/m64 ; 0F FA /r [PENT,MMX]
3885
+
3886
+ # PSUBSB mmxreg,r/m64 ; 0F E8 /r [PENT,MMX]
3887
+ # PSUBSW mmxreg,r/m64 ; 0F E9 /r [PENT,MMX]
3888
+
3889
+ # PSUBUSB mmxreg,r/m64 ; 0F D8 /r [PENT,MMX]
3890
+ # PSUBUSW mmxreg,r/m64 ; 0F D9 /r [PENT,MMX]
3891
+
3892
+ # |PSUBxx| all perform packed subtraction between their two 64-bit
3893
+ # operands, storing the result in the destination (first) operand. The
3894
+ # |PSUBxB| forms treat the 64-bit operands as vectors of eight bytes, and
3895
+ # subtract each byte individually; |PSUBxW| treat the operands as vectors
3896
+ # of four words; and |PSUBD| treats its operands as vectors of two
3897
+ # doublewords.
3898
+
3899
+ # In all cases, the elements of the operand on the right are subtracted
3900
+ # from the corresponding elements of the operand on the left, not the
3901
+ # other way round.
3902
+
3903
+ # |PSUBSB| and |PSUBSW| perform signed saturation on the sum of each pair
3904
+ # of bytes or words: if the result of a subtraction is too large or too
3905
+ # small to fit into a signed byte or word result, it is clipped
3906
+ # (saturated) to the largest or smallest value which /will/ fit. |PSUBUSB|
3907
+ # and |PSUBUSW| similarly perform unsigned saturation, clipping to |0FFh|
3908
+ # or |0FFFFh| if the result is larger than that.
3909
+
3910
+
3911
+ # A.132 |PSUBSIW|: MMX Packed Subtract with Saturation to Implied
3912
+ # Destination
3913
+
3914
+ # PSUBSIW mmxreg,r/m64 ; 0F 55 /r [CYRIX,MMX]
3915
+
3916
+ # |PSUBSIW|, specific to the Cyrix extensions to the MMX instruction set,
3917
+ # performs the same function as |PSUBSW|, except that the result is not
3918
+ # placed in the register specified by the first operand, but instead in
3919
+ # the implied destination register, specified as for |PADDSIW| (section
3920
+ # A.115 <#section-A.115>).
3921
+
3922
+
3923
+ # A.133 |PUNPCKxxx|: Unpack Data
3924
+
3925
+ # PUNPCKHBW mmxreg,r/m64 ; 0F 68 /r [PENT,MMX]
3926
+ # PUNPCKHWD mmxreg,r/m64 ; 0F 69 /r [PENT,MMX]
3927
+ # PUNPCKHDQ mmxreg,r/m64 ; 0F 6A /r [PENT,MMX]
3928
+
3929
+ # PUNPCKLBW mmxreg,r/m64 ; 0F 60 /r [PENT,MMX]
3930
+ # PUNPCKLWD mmxreg,r/m64 ; 0F 61 /r [PENT,MMX]
3931
+ # PUNPCKLDQ mmxreg,r/m64 ; 0F 62 /r [PENT,MMX]
3932
+
3933
+ # |PUNPCKxx| all treat their operands as vectors, and produce a new vector
3934
+ # generated by interleaving elements from the two inputs. The |PUNPCKHxx|
3935
+ # instructions start by throwing away the bottom half of each input
3936
+ # operand, and the |PUNPCKLxx| instructions throw away the top half.
3937
+
3938
+ # The remaining elements, totalling 64 bits, are then interleaved into the
3939
+ # destination, alternating elements from the second (source) operand and
3940
+ # the first (destination) operand: so the leftmost element in the result
3941
+ # always comes from the second operand, and the rightmost from the
3942
+ # destination.
3943
+
3944
+ # |PUNPCKxBW| works a byte at a time, |PUNPCKxWD| a word at a time, and
3945
+ # |PUNPCKxDQ| a doubleword at a time.
3946
+
3947
+ # So, for example, if the first operand held |0x7A6A5A4A3A2A1A0A| and the
3948
+ # second held |0x7B6B5B4B3B2B1B0B|, then:
3949
+
3950
+ # * |PUNPCKHBW| would return |0x7B7A6B6A5B5A4B4A|.
3951
+ # * |PUNPCKHWD| would return |0x7B6B7A6A5B4B5A4A|.
3952
+ # * |PUNPCKHDQ| would return |0x7B6B5B4B7A6A5A4A|.
3953
+ # * |PUNPCKLBW| would return |0x3B3A2B2A1B1A0B0A|.
3954
+ # * |PUNPCKLWD| would return |0x3B2B3A2A1B0B1A0A|.
3955
+ # * |PUNPCKLDQ| would return |0x3B2B1B0B3A2A1A0A|.
3956
+
3957
+
3958
+ # A.134 |PUSH|: Push Data on Stack
3959
+
3960
+ # PUSH reg16 ; o16 50+r [8086]
3961
+ # PUSH reg32 ; o32 50+r [386]
3962
+
3963
+ # PUSH r/m16 ; o16 FF /6 [8086]
3964
+ # PUSH r/m32 ; o32 FF /6 [386]
3965
+
3966
+ # PUSH CS ; 0E [8086]
3967
+ # PUSH DS ; 1E [8086]
3968
+ # PUSH ES ; 06 [8086]
3969
+ # PUSH SS ; 16 [8086]
3970
+ # PUSH FS ; 0F A0 [386]
3971
+ # PUSH GS ; 0F A8 [386]
3972
+
3973
+ # PUSH imm8 ; 6A ib [286]
3974
+ # PUSH imm16 ; o16 68 iw [286]
3975
+ # PUSH imm32 ; o32 68 id [386]
3976
+
3977
+ # |PUSH| decrements the stack pointer (|SP| or |ESP|) by 2 or 4, and then
3978
+ # stores the given value at |[SS:SP]| or |[SS:ESP]|.
3979
+
3980
+ # The address-size attribute of the instruction determines whether |SP| or
3981
+ # |ESP| is used as the stack pointer: to deliberately override the default
3982
+ # given by the |BITS| setting, you can use an |a16| or |a32| prefix.
3983
+
3984
+ # The operand-size attribute of the instruction determines whether the
3985
+ # stack pointer is decremented by 2 or 4: this means that segment register
3986
+ # pushes in |BITS 32| mode will push 4 bytes on the stack, of which the
3987
+ # upper two are undefined. If you need to override that, you can use an
3988
+ # |o16| or |o32| prefix.
3989
+
3990
+ # The above opcode listings give two forms for general-purpose register
3991
+ # push instructions: for example, |PUSH BX| has the two forms |53| and |FF
3992
+ # F3|. NASM will always generate the shorter form when given |PUSH BX|.
3993
+ # NDISASM will disassemble both.
3994
+
3995
+ # Unlike the undocumented and barely supported |POP CS|, |PUSH CS| is a
3996
+ # perfectly valid and sensible instruction, supported on all processors.
3997
+
3998
+ # The instruction |PUSH SP| may be used to distinguish an 8086 from later
3999
+ # processors: on an 8086, the value of |SP| stored is the value it has
4000
+ # /after/ the push instruction, whereas on later processors it is the
4001
+ # value /before/ the push instruction.
4002
+
4003
+
4004
+ # A.135 |PUSHAx|: Push All General-Purpose Registers
4005
+
4006
+ # PUSHA ; 60 [186]
4007
+ # PUSHAD ; o32 60 [386]
4008
+ # PUSHAW ; o16 60 [186]
4009
+
4010
+ # |PUSHAW| pushes, in succession, |AX|, |CX|, |DX|, |BX|, |SP|, |BP|, |SI|
4011
+ # and |DI| on the stack, decrementing the stack pointer by a total of 16.
4012
+
4013
+ # |PUSHAD| pushes, in succession, |EAX|, |ECX|, |EDX|, |EBX|, |ESP|,
4014
+ # |EBP|, |ESI| and |EDI| on the stack, decrementing the stack pointer by a
4015
+ # total of 32.
4016
+
4017
+ # In both cases, the value of |SP| or |ESP| pushed is its /original/
4018
+ # value, as it had before the instruction was executed.
4019
+
4020
+ # |PUSHA| is an alias mnemonic for either |PUSHAW| or |PUSHAD|, depending
4021
+ # on the current |BITS| setting.
4022
+
4023
+ # Note that the registers are pushed in order of their numeric values in
4024
+ # opcodes (see section A.2.1 <#section-A.2.1>).
4025
+
4026
+ # See also |POPA| (section A.127 <#section-A.127>).
4027
+
4028
+
4029
+ # A.136 |PUSHFx|: Push Flags Register
4030
+
4031
+ # PUSHF ; 9C [186]
4032
+ # PUSHFD ; o32 9C [386]
4033
+ # PUSHFW ; o16 9C [186]
4034
+
4035
+ # |PUSHFW| pops a word from the stack and stores it in the bottom 16 bits
4036
+ # of the flags register (or the whole flags register, on processors below
4037
+ # a 386). |PUSHFD| pops a doubleword and stores it in the entire flags
4038
+ # register.
4039
+
4040
+ # |PUSHF| is an alias mnemonic for either |PUSHFW| or |PUSHFD|, depending
4041
+ # on the current |BITS| setting.
4042
+
4043
+ # See also |POPF| (section A.128 <#section-A.128>).
4044
+
4045
+
4046
+ # A.137 |PXOR|: MMX Bitwise XOR
4047
+
4048
+ # PXOR mmxreg,r/m64 ; 0F EF /r [PENT,MMX]
4049
+
4050
+ # |PXOR| performs a bitwise XOR operation between its two operands (i.e.
4051
+ # each bit of the result is 1 if and only if exactly one of the
4052
+ # corresponding bits of the two inputs was 1), and stores the result in
4053
+ # the destination (first) operand.
4054
+
4055
+
4056
+ # A.138 |RCL|, |RCR|: Bitwise Rotate through Carry Bit
4057
+
4058
+ # RCL r/m8,1 ; D0 /2 [8086]
4059
+ # RCL r/m8,CL ; D2 /2 [8086]
4060
+ # RCL r/m8,imm8 ; C0 /2 ib [286]
4061
+ # RCL r/m16,1 ; o16 D1 /2 [8086]
4062
+ # RCL r/m16,CL ; o16 D3 /2 [8086]
4063
+ # RCL r/m16,imm8 ; o16 C1 /2 ib [286]
4064
+ # RCL r/m32,1 ; o32 D1 /2 [386]
4065
+ # RCL r/m32,CL ; o32 D3 /2 [386]
4066
+ # RCL r/m32,imm8 ; o32 C1 /2 ib [386]
4067
+
4068
+ # RCR r/m8,1 ; D0 /3 [8086]
4069
+ # RCR r/m8,CL ; D2 /3 [8086]
4070
+ # RCR r/m8,imm8 ; C0 /3 ib [286]
4071
+ # RCR r/m16,1 ; o16 D1 /3 [8086]
4072
+ # RCR r/m16,CL ; o16 D3 /3 [8086]
4073
+ # RCR r/m16,imm8 ; o16 C1 /3 ib [286]
4074
+ # RCR r/m32,1 ; o32 D1 /3 [386]
4075
+ # RCR r/m32,CL ; o32 D3 /3 [386]
4076
+ # RCR r/m32,imm8 ; o32 C1 /3 ib [386]
4077
+
4078
+ # |RCL| and |RCR| perform a 9-bit, 17-bit or 33-bit bitwise rotation
4079
+ # operation, involving the given source/destination (first) operand and
4080
+ # the carry bit. Thus, for example, in the operation |RCR AL,1|, a 9-bit
4081
+ # rotation is performed in which |AL| is shifted left by 1, the top bit of
4082
+ # |AL| moves into the carry flag, and the original value of the carry flag
4083
+ # is placed in the low bit of |AL|.
4084
+
4085
+ # The number of bits to rotate by is given by the second operand. Only the
4086
+ # bottom five bits of the rotation count are considered by processors
4087
+ # above the 8086.
4088
+
4089
+ # You can force the longer (286 and upwards, beginning with a |C1| byte)
4090
+ # form of |RCL foo,1| by using a |BYTE| prefix: |RCL foo,BYTE 1|.
4091
+ # Similarly with |RCR|.
4092
+
4093
+
4094
+ # A.139 |RDMSR|: Read Model-Specific Registers
4095
+
4096
+ # RDMSR ; 0F 32 [PENT]
4097
+
4098
+ # |RDMSR| reads the processor Model-Specific Register (MSR) whose index is
4099
+ # stored in |ECX|, and stores the result in |EDX:EAX|. See also |WRMSR|
4100
+ # (section A.165 <#section-A.165>).
4101
+
4102
+
4103
+ # A.140 |RDPMC|: Read Performance-Monitoring Counters
4104
+
4105
+ # RDPMC ; 0F 33 [P6]
4106
+
4107
+ # |RDPMC| reads the processor performance-monitoring counter whose index
4108
+ # is stored in |ECX|, and stores the result in |EDX:EAX|.
4109
+
4110
+
4111
+ # A.141 |RDTSC|: Read Time-Stamp Counter
4112
+
4113
+ # RDTSC ; 0F 31 [PENT]
4114
+
4115
+ # |RDTSC| reads the processor's time-stamp counter into |EDX:EAX|.
4116
+
4117
+
4118
+ # A.142 |RET|, |RETF|, |RETN|: Return from Procedure Call
4119
+
4120
+ # RET ; C3 [8086]
4121
+ # RET imm16 ; C2 iw [8086]
4122
+
4123
+ # RETF ; CB [8086]
4124
+ # RETF imm16 ; CA iw [8086]
4125
+
4126
+ # RETN ; C3 [8086]
4127
+ # RETN imm16 ; C2 iw [8086]
4128
+
4129
+ # |RET|, and its exact synonym |RETN|, pop |IP| or |EIP| from the stack
4130
+ # and transfer control to the new address. Optionally, if a numeric second
4131
+ # operand is provided, they increment the stack pointer by a further
4132
+ # |imm16| bytes after popping the return address.
4133
+
4134
+ # |RETF| executes a far return: after popping |IP|/|EIP|, it then pops
4135
+ # |CS|, and /then/ increments the stack pointer by the optional argument
4136
+ # if present.
4137
+
4138
+
4139
+ # A.143 |ROL|, |ROR|: Bitwise Rotate
4140
+
4141
+ # ROL r/m8,1 ; D0 /0 [8086]
4142
+ # ROL r/m8,CL ; D2 /0 [8086]
4143
+ # ROL r/m8,imm8 ; C0 /0 ib [286]
4144
+ # ROL r/m16,1 ; o16 D1 /0 [8086]
4145
+ # ROL r/m16,CL ; o16 D3 /0 [8086]
4146
+ # ROL r/m16,imm8 ; o16 C1 /0 ib [286]
4147
+ # ROL r/m32,1 ; o32 D1 /0 [386]
4148
+ # ROL r/m32,CL ; o32 D3 /0 [386]
4149
+ # ROL r/m32,imm8 ; o32 C1 /0 ib [386]
4150
+
4151
+ # ROR r/m8,1 ; D0 /1 [8086]
4152
+ # ROR r/m8,CL ; D2 /1 [8086]
4153
+ # ROR r/m8,imm8 ; C0 /1 ib [286]
4154
+ # ROR r/m16,1 ; o16 D1 /1 [8086]
4155
+ # ROR r/m16,CL ; o16 D3 /1 [8086]
4156
+ # ROR r/m16,imm8 ; o16 C1 /1 ib [286]
4157
+ # ROR r/m32,1 ; o32 D1 /1 [386]
4158
+ # ROR r/m32,CL ; o32 D3 /1 [386]
4159
+ # ROR r/m32,imm8 ; o32 C1 /1 ib [386]
4160
+
4161
+ # |ROL| and |ROR| perform a bitwise rotation operation on the given
4162
+ # source/destination (first) operand. Thus, for example, in the operation
4163
+ # |ROR AL,1|, an 8-bit rotation is performed in which |AL| is shifted left
4164
+ # by 1 and the original top bit of |AL| moves round into the low bit.
4165
+
4166
+ # The number of bits to rotate by is given by the second operand. Only the
4167
+ # bottom 3, 4 or 5 bits (depending on the source operand size) of the
4168
+ # rotation count are considered by processors above the 8086.
4169
+
4170
+ # You can force the longer (286 and upwards, beginning with a |C1| byte)
4171
+ # form of |ROL foo,1| by using a |BYTE| prefix: |ROL foo,BYTE 1|.
4172
+ # Similarly with |ROR|.
4173
+
4174
+
4175
+ # A.144 |RSM|: Resume from System-Management Mode
4176
+
4177
+ # RSM ; 0F AA [PENT]
4178
+
4179
+ # |RSM| returns the processor to its normal operating mode when it was in
4180
+ # System-Management Mode.
4181
+
4182
+
4183
+ # A.145 |SAHF|: Store AH to Flags
4184
+
4185
+ # SAHF ; 9E [8086]
4186
+
4187
+ # |SAHF| sets the low byte of the flags word according to the contents of
4188
+ # the |AH| register. See also |LAHF| (section A.90 <#section-A.90>).
4189
+
4190
+
4191
+ # A.146 |SAL|, |SAR|: Bitwise Arithmetic Shifts
4192
+
4193
+ # SAL r/m8,1 ; D0 /4 [8086]
4194
+ # SAL r/m8,CL ; D2 /4 [8086]
4195
+ # SAL r/m8,imm8 ; C0 /4 ib [286]
4196
+ # SAL r/m16,1 ; o16 D1 /4 [8086]
4197
+ # SAL r/m16,CL ; o16 D3 /4 [8086]
4198
+ # SAL r/m16,imm8 ; o16 C1 /4 ib [286]
4199
+ # SAL r/m32,1 ; o32 D1 /4 [386]
4200
+ # SAL r/m32,CL ; o32 D3 /4 [386]
4201
+ # SAL r/m32,imm8 ; o32 C1 /4 ib [386]
4202
+
4203
+ # SAR r/m8,1 ; D0 /0 [8086]
4204
+ # SAR r/m8,CL ; D2 /0 [8086]
4205
+ # SAR r/m8,imm8 ; C0 /0 ib [286]
4206
+ # SAR r/m16,1 ; o16 D1 /0 [8086]
4207
+ # SAR r/m16,CL ; o16 D3 /0 [8086]
4208
+ # SAR r/m16,imm8 ; o16 C1 /0 ib [286]
4209
+ # SAR r/m32,1 ; o32 D1 /0 [386]
4210
+ # SAR r/m32,CL ; o32 D3 /0 [386]
4211
+ # SAR r/m32,imm8 ; o32 C1 /0 ib [386]
4212
+
4213
+ # |SAL| and |SAR| perform an arithmetic shift operation on the given
4214
+ # source/destination (first) operand. The vacated bits are filled with
4215
+ # zero for |SAL|, and with copies of the original high bit of the source
4216
+ # operand for |SAR|.
4217
+
4218
+ # |SAL| is a synonym for |SHL| (see section A.152 <#section-A.152>). NASM
4219
+ # will assemble either one to the same code, but NDISASM will always
4220
+ # disassemble that code as |SHL|.
4221
+
4222
+ # The number of bits to shift by is given by the second operand. Only the
4223
+ # bottom 3, 4 or 5 bits (depending on the source operand size) of the
4224
+ # shift count are considered by processors above the 8086.
4225
+
4226
+ # You can force the longer (286 and upwards, beginning with a |C1| byte)
4227
+ # form of |SAL foo,1| by using a |BYTE| prefix: |SAL foo,BYTE 1|.
4228
+ # Similarly with |SAR|.
4229
+
4230
+
4231
+ # A.147 |SALC|: Set AL from Carry Flag
4232
+
4233
+ # SALC ; D6 [8086,UNDOC]
4234
+
4235
+ # |SALC| is an early undocumented instruction similar in concept to
4236
+ # |SETcc| (section A.150 <#section-A.150>). Its function is to set |AL| to
4237
+ # zero if the carry flag is clear, or to |0xFF| if it is set.
4238
+
4239
+
4240
+ # A.148 |SBB|: Subtract with Borrow
4241
+
4242
+ # SBB r/m8,reg8 ; 18 /r [8086]
4243
+ # SBB r/m16,reg16 ; o16 19 /r [8086]
4244
+ # SBB r/m32,reg32 ; o32 19 /r [386]
4245
+
4246
+ # SBB reg8,r/m8 ; 1A /r [8086]
4247
+ # SBB reg16,r/m16 ; o16 1B /r [8086]
4248
+ # SBB reg32,r/m32 ; o32 1B /r [386]
4249
+
4250
+ # SBB r/m8,imm8 ; 80 /3 ib [8086]
4251
+ # SBB r/m16,imm16 ; o16 81 /3 iw [8086]
4252
+ # SBB r/m32,imm32 ; o32 81 /3 id [386]
4253
+
4254
+ # SBB r/m16,imm8 ; o16 83 /3 ib [8086]
4255
+ # SBB r/m32,imm8 ; o32 83 /3 ib [8086]
4256
+
4257
+ # SBB AL,imm8 ; 1C ib [8086]
4258
+ # SBB AX,imm16 ; o16 1D iw [8086]
4259
+ # SBB EAX,imm32 ; o32 1D id [386]
4260
+
4261
+ # |SBB| performs integer subtraction: it subtracts its second operand,
4262
+ # plus the value of the carry flag, from its first, and leaves the result
4263
+ # in its destination (first) operand. The flags are set according to the
4264
+ # result of the operation: in particular, the carry flag is affected and
4265
+ # can be used by a subsequent |SBB| instruction.
4266
+
4267
+ # In the forms with an 8-bit immediate second operand and a longer first
4268
+ # operand, the second operand is considered to be signed, and is
4269
+ # sign-extended to the length of the first operand. In these cases, the
4270
+ # |BYTE| qualifier is necessary to force NASM to generate this form of the
4271
+ # instruction.
4272
+
4273
+ # To subtract one number from another without also subtracting the
4274
+ # contents of the carry flag, use |SUB| (section A.159 <#section-A.159>).
4275
+
4276
+
4277
+ # A.149 |SCASB|, |SCASW|, |SCASD|: Scan String
4278
+
4279
+ # SCASB ; AE [8086]
4280
+ # SCASW ; o16 AF [8086]
4281
+ # SCASD ; o32 AF [386]
4282
+
4283
+ # |SCASB| compares the byte in |AL| with the byte at |[ES:DI]| or
4284
+ # |[ES:EDI]|, and sets the flags accordingly. It then increments or
4285
+ # decrements (depending on the direction flag: increments if the flag is
4286
+ # clear, decrements if it is set) |DI| (or |EDI|).
4287
+
4288
+ # The register used is |DI| if the address size is 16 bits, and |EDI| if
4289
+ # it is 32 bits. If you need to use an address size not equal to the
4290
+ # current |BITS| setting, you can use an explicit |a16| or |a32| prefix.
4291
+
4292
+ # Segment override prefixes have no effect for this instruction: the use
4293
+ # of |ES| for the load from |[DI]| or |[EDI]| cannot be overridden.
4294
+
4295
+ # |SCASW| and |SCASD| work in the same way, but they compare a word to
4296
+ # |AX| or a doubleword to |EAX| instead of a byte to |AL|, and increment
4297
+ # or decrement the addressing registers by 2 or 4 instead of 1.
4298
+
4299
+ # The |REPE| and |REPNE| prefixes (equivalently, |REPZ| and |REPNZ|) may
4300
+ # be used to repeat the instruction up to |CX| (or |ECX| - again, the
4301
+ # address size chooses which) times until the first unequal or equal byte
4302
+ # is found.
4303
+
4304
+
4305
+ # A.150 |SETcc|: Set Register from Condition
4306
+
4307
+ # SETcc r/m8 ; 0F 90+cc /2 [386]
4308
+
4309
+ # |SETcc| sets the given 8-bit operand to zero if its condition is not
4310
+ # satisfied, and to 1 if it is.
4311
+
4312
+
4313
+ # A.151 |SGDT|, |SIDT|, |SLDT|: Store Descriptor Table Pointers
4314
+
4315
+ # SGDT mem ; 0F 01 /0 [286,PRIV]
4316
+ # SIDT mem ; 0F 01 /1 [286,PRIV]
4317
+ # SLDT r/m16 ; 0F 00 /0 [286,PRIV]
4318
+
4319
+ # |SGDT| and |SIDT| both take a 6-byte memory area as an operand: they
4320
+ # store the contents of the GDTR (global descriptor table register) or
4321
+ # IDTR (interrupt descriptor table register) into that area as a 32-bit
4322
+ # linear address and a 16-bit size limit from that area (in that order).
4323
+ # These are the only instructions which directly use /linear/ addresses,
4324
+ # rather than segment/offset pairs.
4325
+
4326
+ # |SLDT| stores the segment selector corresponding to the LDT (local
4327
+ # descriptor table) into the given operand.
4328
+
4329
+ # See also |LGDT|, |LIDT| and |LLDT| (section A.95 <#section-A.95>).
4330
+
4331
+
4332
+ # A.152 |SHL|, |SHR|: Bitwise Logical Shifts
4333
+
4334
+ # SHL r/m8,1 ; D0 /4 [8086]
4335
+ # SHL r/m8,CL ; D2 /4 [8086]
4336
+ # SHL r/m8,imm8 ; C0 /4 ib [286]
4337
+ # SHL r/m16,1 ; o16 D1 /4 [8086]
4338
+ # SHL r/m16,CL ; o16 D3 /4 [8086]
4339
+ # SHL r/m16,imm8 ; o16 C1 /4 ib [286]
4340
+ # SHL r/m32,1 ; o32 D1 /4 [386]
4341
+ # SHL r/m32,CL ; o32 D3 /4 [386]
4342
+ # SHL r/m32,imm8 ; o32 C1 /4 ib [386]
4343
+
4344
+ # SHR r/m8,1 ; D0 /5 [8086]
4345
+ # SHR r/m8,CL ; D2 /5 [8086]
4346
+ # SHR r/m8,imm8 ; C0 /5 ib [286]
4347
+ # SHR r/m16,1 ; o16 D1 /5 [8086]
4348
+ # SHR r/m16,CL ; o16 D3 /5 [8086]
4349
+ # SHR r/m16,imm8 ; o16 C1 /5 ib [286]
4350
+ # SHR r/m32,1 ; o32 D1 /5 [386]
4351
+ # SHR r/m32,CL ; o32 D3 /5 [386]
4352
+ # SHR r/m32,imm8 ; o32 C1 /5 ib [386]
4353
+
4354
+ # |SHL| and |SHR| perform a logical shift operation on the given
4355
+ # source/destination (first) operand. The vacated bits are filled with zero.
4356
+
4357
+ # A synonym for |SHL| is |SAL| (see section A.146 <#section-A.146>). NASM
4358
+ # will assemble either one to the same code, but NDISASM will always
4359
+ # disassemble that code as |SHL|.
4360
+
4361
+ # The number of bits to shift by is given by the second operand. Only the
4362
+ # bottom 3, 4 or 5 bits (depending on the source operand size) of the
4363
+ # shift count are considered by processors above the 8086.
4364
+
4365
+ # You can force the longer (286 and upwards, beginning with a |C1| byte)
4366
+ # form of |SHL foo,1| by using a |BYTE| prefix: |SHL foo,BYTE 1|.
4367
+ # Similarly with |SHR|.
4368
+
4369
+
4370
+ # A.153 |SHLD|, |SHRD|: Bitwise Double-Precision Shifts
4371
+
4372
+ # SHLD r/m16,reg16,imm8 ; o16 0F A4 /r ib [386]
4373
+ # SHLD r/m16,reg32,imm8 ; o32 0F A4 /r ib [386]
4374
+ # SHLD r/m16,reg16,CL ; o16 0F A5 /r [386]
4375
+ # SHLD r/m16,reg32,CL ; o32 0F A5 /r [386]
4376
+
4377
+ # SHRD r/m16,reg16,imm8 ; o16 0F AC /r ib [386]
4378
+ # SHRD r/m32,reg32,imm8 ; o32 0F AC /r ib [386]
4379
+ # SHRD r/m16,reg16,CL ; o16 0F AD /r [386]
4380
+ # SHRD r/m32,reg32,CL ; o32 0F AD /r [386]
4381
+
4382
+ # |SHLD| performs a double-precision left shift. It notionally places its
4383
+ # second operand to the right of its first, then shifts the entire bit
4384
+ # string thus generated to the left by a number of bits specified in the
4385
+ # third operand. It then updates only the /first/ operand according to the
4386
+ # result of this. The second operand is not modified.
4387
+
4388
+ # |SHRD| performs the corresponding right shift: it notionally places the
4389
+ # second operand to the /left/ of the first, shifts the whole bit string
4390
+ # right, and updates only the first operand.
4391
+
4392
+ # For example, if |EAX| holds |0x01234567| and |EBX| holds |0x89ABCDEF|,
4393
+ # then the instruction |SHLD EAX,EBX,4| would update |EAX| to hold
4394
+ # |0x12345678|. Under the same conditions, |SHRD EAX,EBX,4| would update
4395
+ # |EAX| to hold |0xF0123456|.
4396
+
4397
+ # The number of bits to shift by is given by the third operand. Only the
4398
+ # bottom 5 bits of the shift count are considered.
4399
+
4400
+
4401
+ # A.154 |SMI|: System Management Interrupt
4402
+
4403
+ # SMI ; F1 [386,UNDOC]
4404
+
4405
+ # This is an opcode apparently supported by some AMD processors (which is
4406
+ # why it can generate the same opcode as |INT1|), and places the machine
4407
+ # into system-management mode, a special debugging mode.
4408
+
4409
+
4410
+ # A.155 |SMSW|: Store Machine Status Word
4411
+
4412
+ # SMSW r/m16 ; 0F 01 /4 [286,PRIV]
4413
+
4414
+ # |SMSW| stores the bottom half of the |CR0| control register (or the
4415
+ # Machine Status Word, on 286 processors) into the destination operand.
4416
+ # See also |LMSW| (section A.96 <#section-A.96>).
4417
+
4418
+
4419
+ # A.156 |STC|, |STD|, |STI|: Set Flags
4420
+
4421
+ # STC ; F9 [8086]
4422
+ # STD ; FD [8086]
4423
+ # STI ; FB [8086]
4424
+
4425
+ # These instructions set various flags. |STC| sets the carry flag; |STD|
4426
+ # sets the direction flag; and |STI| sets the interrupt flag (thus
4427
+ # enabling interrupts).
4428
+
4429
+ # To clear the carry, direction, or interrupt flags, use the |CLC|, |CLD|
4430
+ # and |CLI| instructions (section A.15 <#section-A.15>). To invert the
4431
+ # carry flag, use |CMC| (section A.16 <#section-A.16>).
4432
+
4433
+
4434
+ # A.157 |STOSB|, |STOSW|, |STOSD|: Store Byte to String
4435
+
4436
+ # STOSB ; AA [8086]
4437
+ # STOSW ; o16 AB [8086]
4438
+ # STOSD ; o32 AB [386]
4439
+
4440
+ # |STOSB| stores the byte in |AL| at |[ES:DI]| or |[ES:EDI]|, and sets the
4441
+ # flags accordingly. It then increments or decrements (depending on the
4442
+ # direction flag: increments if the flag is clear, decrements if it is
4443
+ # set) |DI| (or |EDI|).
4444
+
4445
+ # The register used is |DI| if the address size is 16 bits, and |EDI| if
4446
+ # it is 32 bits. If you need to use an address size not equal to the
4447
+ # current |BITS| setting, you can use an explicit |a16| or |a32| prefix.
4448
+
4449
+ # Segment override prefixes have no effect for this instruction: the use
4450
+ # of |ES| for the store to |[DI]| or |[EDI]| cannot be overridden.
4451
+
4452
+ # |STOSW| and |STOSD| work in the same way, but they store the word in
4453
+ # |AX| or the doubleword in |EAX| instead of the byte in |AL|, and
4454
+ # increment or decrement the addressing registers by 2 or 4 instead of 1.
4455
+
4456
+ # The |REP| prefix may be used to repeat the instruction |CX| (or |ECX| -
4457
+ # again, the address size chooses which) times.
4458
+
4459
+
4460
+ # A.158 |STR|: Store Task Register
4461
+
4462
+ # STR r/m16 ; 0F 00 /1 [286,PRIV]
4463
+
4464
+ # |STR| stores the segment selector corresponding to the contents of the
4465
+ # Task Register into its operand.
4466
+
4467
+
4468
+ # A.159 |SUB|: Subtract Integers
4469
+
4470
+ # SUB r/m8,reg8 ; 28 /r [8086]
4471
+ # SUB r/m16,reg16 ; o16 29 /r [8086]
4472
+ # SUB r/m32,reg32 ; o32 29 /r [386]
4473
+
4474
+ # SUB reg8,r/m8 ; 2A /r [8086]
4475
+ # SUB reg16,r/m16 ; o16 2B /r [8086]
4476
+ # SUB reg32,r/m32 ; o32 2B /r [386]
4477
+
4478
+ # SUB r/m8,imm8 ; 80 /5 ib [8086]
4479
+ # SUB r/m16,imm16 ; o16 81 /5 iw [8086]
4480
+ # SUB r/m32,imm32 ; o32 81 /5 id [386]
4481
+
4482
+ # SUB r/m16,imm8 ; o16 83 /5 ib [8086]
4483
+ # SUB r/m32,imm8 ; o32 83 /5 ib [386]
4484
+
4485
+ # SUB AL,imm8 ; 2C ib [8086]
4486
+ # SUB AX,imm16 ; o16 2D iw [8086]
4487
+ # SUB EAX,imm32 ; o32 2D id [386]
4488
+
4489
+ # |SUB| performs integer subtraction: it subtracts its second operand from
4490
+ # its first, and leaves the result in its destination (first) operand. The
4491
+ # flags are set according to the result of the operation: in particular,
4492
+ # the carry flag is affected and can be used by a subsequent |SBB|
4493
+ # instruction (section A.148 <#section-A.148>).
4494
+
4495
+ # In the forms with an 8-bit immediate second operand and a longer first
4496
+ # operand, the second operand is considered to be signed, and is
4497
+ # sign-extended to the length of the first operand. In these cases, the
4498
+ # |BYTE| qualifier is necessary to force NASM to generate this form of the
4499
+ # instruction.
4500
+
4501
+
4502
+ # A.160 |TEST|: Test Bits (notional bitwise AND)
4503
+
4504
+ # TEST r/m8,reg8 ; 84 /r [8086]
4505
+ # TEST r/m16,reg16 ; o16 85 /r [8086]
4506
+ # TEST r/m32,reg32 ; o32 85 /r [386]
4507
+
4508
+ # TEST r/m8,imm8 ; F6 /7 ib [8086]
4509
+ # TEST r/m16,imm16 ; o16 F7 /7 iw [8086]
4510
+ # TEST r/m32,imm32 ; o32 F7 /7 id [386]
4511
+
4512
+ # TEST AL,imm8 ; A8 ib [8086]
4513
+ # TEST AX,imm16 ; o16 A9 iw [8086]
4514
+ # TEST EAX,imm32 ; o32 A9 id [386]
4515
+
4516
+ # |TEST| performs a `mental' bitwise AND of its two operands, and affects
4517
+ # the flags as if the operation had taken place, but does not store the
4518
+ # result of the operation anywhere.
4519
+
4520
+
4521
+ # A.161 |UMOV|: User Move Data
4522
+
4523
+ # UMOV r/m8,reg8 ; 0F 10 /r [386,UNDOC]
4524
+ # UMOV r/m16,reg16 ; o16 0F 11 /r [386,UNDOC]
4525
+ # UMOV r/m32,reg32 ; o32 0F 11 /r [386,UNDOC]
4526
+
4527
+ # UMOV reg8,r/m8 ; 0F 12 /r [386,UNDOC]
4528
+ # UMOV reg16,r/m16 ; o16 0F 13 /r [386,UNDOC]
4529
+ # UMOV reg32,r/m32 ; o32 0F 13 /r [386,UNDOC]
4530
+
4531
+ # This undocumented instruction is used by in-circuit emulators to access
4532
+ # user memory (as opposed to host memory). It is used just like an
4533
+ # ordinary memory/register or register/register |MOV| instruction, but
4534
+ # accesses user space.
4535
+
4536
+
4537
+ # A.162 |VERR|, |VERW|: Verify Segment Readability/Writability
4538
+
4539
+ # VERR r/m16 ; 0F 00 /4 [286,PRIV]
4540
+
4541
+ # VERW r/m16 ; 0F 00 /5 [286,PRIV]
4542
+
4543
+ # |VERR| sets the zero flag if the segment specified by the selector in
4544
+ # its operand can be read from at the current privilege level. |VERW| sets
4545
+ # the zero flag if the segment can be written.
4546
+
4547
+
4548
+ # A.163 |WAIT|: Wait for Floating-Point Processor
4549
+
4550
+ # WAIT ; 9B [8086]
4551
+
4552
+ # |WAIT|, on 8086 systems with a separate 8087 FPU, waits for the FPU to
4553
+ # have finished any operation it is engaged in before continuing main
4554
+ # processor operations, so that (for example) an FPU store to main memory
4555
+ # can be guaranteed to have completed before the CPU tries to read the
4556
+ # result back out.
4557
+
4558
+ # On higher processors, |WAIT| is unnecessary for this purpose, and it has
4559
+ # the alternative purpose of ensuring that any pending unmasked FPU
4560
+ # exceptions have happened before execution continues.
4561
+
4562
+
4563
+ # A.164 |WBINVD|: Write Back and Invalidate Cache
4564
+
4565
+ # WBINVD ; 0F 09 [486]
4566
+
4567
+ # |WBINVD| invalidates and empties the processor's internal caches, and
4568
+ # causes the processor to instruct external caches to do the same. It
4569
+ # writes the contents of the caches back to memory first, so no data is
4570
+ # lost. To flush the caches quickly without bothering to write the data
4571
+ # back first, use |INVD| (section A.84 <#section-A.84>).
4572
+
4573
+
4574
+ # A.165 |WRMSR|: Write Model-Specific Registers
4575
+
4576
+ # WRMSR ; 0F 30 [PENT]
4577
+
4578
+ # |WRMSR| writes the value in |EDX:EAX| to the processor Model-Specific
4579
+ # Register (MSR) whose index is stored in |ECX|. See also |RDMSR| (section
4580
+ # A.139 <#section-A.139>).
4581
+
4582
+
4583
+ # A.166 |XADD|: Exchange and Add
4584
+
4585
+ # XADD r/m8,reg8 ; 0F C0 /r [486]
4586
+ # XADD r/m16,reg16 ; o16 0F C1 /r [486]
4587
+ # XADD r/m32,reg32 ; o32 0F C1 /r [486]
4588
+
4589
+ # |XADD| exchanges the values in its two operands, and then adds them
4590
+ # together and writes the result into the destination (first) operand.
4591
+ # This instruction can be used with a |LOCK| prefix for multi-processor
4592
+ # synchronisation purposes.
4593
+
4594
+
4595
+ # A.167 |XBTS|: Extract Bit String
4596
+
4597
+ # XBTS reg16,r/m16 ; o16 0F A6 /r [386,UNDOC]
4598
+ # XBTS reg32,r/m32 ; o32 0F A6 /r [386,UNDOC]
4599
+
4600
+ # No clear documentation seems to be available for this instruction: the
4601
+ # best I've been able to find reads `Takes a string of bits from the first
4602
+ # operand and puts them in the second operand'. It is present only in
4603
+ # early 386 processors, and conflicts with the opcodes for |CMPXCHG486|.
4604
+ # NASM supports it only for completeness. Its counterpart is |IBTS| (see
4605
+ # section A.75 <#section-A.75>).
4606
+
4607
+
4608
+ # A.168 |XCHG|: Exchange
4609
+
4610
+ # XCHG reg8,r/m8 ; 86 /r [8086]
4611
+ # XCHG reg16,r/m8 ; o16 87 /r [8086]
4612
+ # XCHG reg32,r/m32 ; o32 87 /r [386]
4613
+
4614
+ # XCHG r/m8,reg8 ; 86 /r [8086]
4615
+ # XCHG r/m16,reg16 ; o16 87 /r [8086]
4616
+ # XCHG r/m32,reg32 ; o32 87 /r [386]
4617
+
4618
+ # XCHG AX,reg16 ; o16 90+r [8086]
4619
+ # XCHG EAX,reg32 ; o32 90+r [386]
4620
+ # XCHG reg16,AX ; o16 90+r [8086]
4621
+ # XCHG reg32,EAX ; o32 90+r [386]
4622
+
4623
+ # |XCHG| exchanges the values in its two operands. It can be used with a
4624
+ # |LOCK| prefix for purposes of multi-processor synchronisation.
4625
+
4626
+ # |XCHG AX,AX| or |XCHG EAX,EAX| (depending on the |BITS| setting)
4627
+ # generates the opcode |90h|, and so is a synonym for |NOP| (section A.109
4628
+ # <#section-A.109>).
4629
+
4630
+
4631
+ # A.169 |XLATB|: Translate Byte in Lookup Table
4632
+
4633
+ # XLATB ; D7 [8086]
4634
+
4635
+ # |XLATB| adds the value in |AL|, treated as an unsigned byte, to |BX| or
4636
+ # |EBX|, and loads the byte from the resulting address (in the segment
4637
+ # specified by |DS|) back into |AL|.
4638
+
4639
+ # The base register used is |BX| if the address size is 16 bits, and |EBX|
4640
+ # if it is 32 bits. If you need to use an address size not equal to the
4641
+ # current |BITS| setting, you can use an explicit |a16| or |a32| prefix.
4642
+
4643
+ # The segment register used to load from |[BX+AL]| or |[EBX+AL]| can be
4644
+ # overridden by using a segment register name as a prefix (for example,
4645
+ # |es xlatb|).
4646
+
4647
+
4648
+ # A.170 |XOR|: Bitwise Exclusive OR
4649
+
4650
+ # XOR r/m8,reg8 ; 30 /r [8086]
4651
+ # XOR r/m16,reg16 ; o16 31 /r [8086]
4652
+ # XOR r/m32,reg32 ; o32 31 /r [386]
4653
+
4654
+ # XOR reg8,r/m8 ; 32 /r [8086]
4655
+ # XOR reg16,r/m16 ; o16 33 /r [8086]
4656
+ # XOR reg32,r/m32 ; o32 33 /r [386]
4657
+
4658
+ # XOR r/m8,imm8 ; 80 /6 ib [8086]
4659
+ # XOR r/m16,imm16 ; o16 81 /6 iw [8086]
4660
+ # XOR r/m32,imm32 ; o32 81 /6 id [386]
4661
+
4662
+ # XOR r/m16,imm8 ; o16 83 /6 ib [8086]
4663
+ # XOR r/m32,imm8 ; o32 83 /6 ib [386]
4664
+
4665
+ # XOR AL,imm8 ; 34 ib [8086]
4666
+ # XOR AX,imm16 ; o16 35 iw [8086]
4667
+ # XOR EAX,imm32 ; o32 35 id [386]
4668
+
4669
+ # |XOR| performs a bitwise XOR operation between its two operands (i.e.
4670
+ # each bit of the result is 1 if and only if exactly one of the
4671
+ # corresponding bits of the two inputs was 1), and stores the result in
4672
+ # the destination (first) operand.
4673
+
4674
+ # In the forms with an 8-bit immediate second operand and a longer first
4675
+ # operand, the second operand is considered to be signed, and is
4676
+ # sign-extended to the length of the first operand. In these cases, the
4677
+ # |BYTE| qualifier is necessary to force NASM to generate this form of the
4678
+ # instruction.
4679
+
4680
+ # The MMX instruction |PXOR| (see section A.137 <#section-A.137>) performs
4681
+ # the same operation on the 64-bit MMX registers.
4682
+
4683
+ # Previous Chapter <nasmdo10.html> | Contents <nasmdoc0.html> | Index
4684
+ # <nasmdoci.html>