FindAFactor 3.9.0__tar.gz → 4.1.0__tar.gz
Sign up to get free protection for your applications and to get access to all the features.
- findafactor-4.1.0/FindAFactor/__init__.py +1 -0
- {findafactor-3.9.0 → findafactor-4.1.0}/FindAFactor/_find_a_factor.cpp +82 -149
- {findafactor-3.9.0 → findafactor-4.1.0}/FindAFactor/find_a_factor.py +13 -7
- {findafactor-3.9.0 → findafactor-4.1.0}/FindAFactor.egg-info/PKG-INFO +1 -1
- {findafactor-3.9.0 → findafactor-4.1.0}/FindAFactor.egg-info/SOURCES.txt +0 -1
- {findafactor-3.9.0 → findafactor-4.1.0}/PKG-INFO +1 -1
- {findafactor-3.9.0 → findafactor-4.1.0}/README.md +8 -10
- {findafactor-3.9.0 → findafactor-4.1.0}/pyproject.toml +3 -7
- {findafactor-3.9.0 → findafactor-4.1.0}/setup.py +1 -1
- findafactor-3.9.0/FindAFactor/__init__.py +0 -1
- findafactor-3.9.0/FindAFactor/oclengine.cpp +0 -386
- {findafactor-3.9.0 → findafactor-4.1.0}/FindAFactor/dispatchqueue.cpp +0 -0
- {findafactor-3.9.0 → findafactor-4.1.0}/FindAFactor.egg-info/dependency_links.txt +0 -0
- {findafactor-3.9.0 → findafactor-4.1.0}/FindAFactor.egg-info/not-zip-safe +0 -0
- {findafactor-3.9.0 → findafactor-4.1.0}/FindAFactor.egg-info/top_level.txt +0 -0
- {findafactor-3.9.0 → findafactor-4.1.0}/LICENSE +0 -0
- {findafactor-3.9.0 → findafactor-4.1.0}/setup.cfg +0 -0
@@ -0,0 +1 @@
|
|
1
|
+
from .find_a_factor import find_a_factor, FactoringMethod
|
@@ -744,7 +744,6 @@ inline BigInteger modExp(BigInteger base, BigInteger exp, const BigInteger &mod)
|
|
744
744
|
|
745
745
|
struct Factorizer {
|
746
746
|
std::mutex batchMutex;
|
747
|
-
std::mutex smoothNumberMapMutex;
|
748
747
|
std::default_random_engine rng;
|
749
748
|
std::mt19937_64 gen;
|
750
749
|
std::uniform_int_distribution<size_t> dis;
|
@@ -761,9 +760,10 @@ struct Factorizer {
|
|
761
760
|
size_t rowOffset;
|
762
761
|
bool isIncomplete;
|
763
762
|
std::vector<size_t> primes;
|
763
|
+
std::vector<BigInteger> bigPrimes;
|
764
|
+
std::vector<BigInteger> sqrPrimes;
|
765
|
+
std::vector<boost::dynamic_bitset<size_t>> primeFactors;
|
764
766
|
ForwardFn forwardFn;
|
765
|
-
std::vector<BigInteger> smoothNumberKeys;
|
766
|
-
std::vector<boost::dynamic_bitset<size_t>> smoothNumberValues;
|
767
767
|
|
768
768
|
Factorizer(const BigInteger &tfsqr, const BigInteger &tf, const BigInteger &tfsqrt, const BigInteger &range, size_t nodeCount, size_t nodeId, size_t w, size_t spl,
|
769
769
|
const std::vector<size_t> &p, ForwardFn fn)
|
@@ -773,9 +773,10 @@ struct Factorizer {
|
|
773
773
|
for (size_t i = 0U; i < primes.size(); ++i) {
|
774
774
|
const size_t& p = primes[i];
|
775
775
|
wheelRadius *= p;
|
776
|
-
|
777
|
-
|
778
|
-
|
776
|
+
bigPrimes.push_back(p);
|
777
|
+
sqrPrimes.push_back(p * p);
|
778
|
+
primeFactors.emplace_back(primes.size(), 0);
|
779
|
+
primeFactors.back()[i] = true;
|
779
780
|
}
|
780
781
|
}
|
781
782
|
|
@@ -809,10 +810,15 @@ struct Factorizer {
|
|
809
810
|
return 1U;
|
810
811
|
}
|
811
812
|
|
812
|
-
BigInteger smoothCongruences(std::vector<boost::dynamic_bitset<size_t>> *inc_seqs
|
813
|
+
BigInteger smoothCongruences(std::vector<boost::dynamic_bitset<size_t>> *inc_seqs) {
|
813
814
|
// Up to wheel factorization, try all batches up to the square root of toFactor.
|
814
815
|
// Since the largest prime factors of these numbers is relatively small,
|
815
816
|
// use the "exhaust" of brute force to produce smooth numbers for Quadratic Sieve.
|
817
|
+
// Different collections per thread;
|
818
|
+
std::map<BigInteger, boost::dynamic_bitset<size_t>> smoothPartsMap;
|
819
|
+
std::vector<BigInteger> smoothParts;
|
820
|
+
smoothParts.reserve(smoothPartsLimit);
|
821
|
+
BigInteger numberCount = 0;
|
816
822
|
for (BigInteger batchNum = getNextAltBatch(); isIncomplete; batchNum = getNextAltBatch()) {
|
817
823
|
const BigInteger batchStart = batchNum * wheelEntryCount;
|
818
824
|
const BigInteger batchEnd = batchStart + wheelEntryCount;
|
@@ -825,16 +831,22 @@ struct Factorizer {
|
|
825
831
|
return n;
|
826
832
|
}
|
827
833
|
// Use the "exhaust" to produce smoother numbers.
|
828
|
-
|
834
|
+
const boost::dynamic_bitset<size_t> fv = factorizationVector(n);
|
835
|
+
if (fv.size()) {
|
836
|
+
smoothPartsMap[n] = fv;
|
837
|
+
smoothParts.push_back(n);
|
838
|
+
}
|
829
839
|
// Skip increments on the "wheels" (or "gears").
|
830
840
|
p += GetWheelIncrement(inc_seqs);
|
831
841
|
}
|
832
842
|
|
833
843
|
// Batch this work, to reduce contention.
|
834
|
-
if (
|
835
|
-
makeSmoothNumbers(
|
836
|
-
|
837
|
-
|
844
|
+
if (smoothParts.size() >= smoothPartsLimit) {
|
845
|
+
const BigInteger n = makeSmoothNumbers(smoothParts, smoothPartsMap);
|
846
|
+
if (!(toFactor % n) && (n != 1U) && (n != toFactor)) {
|
847
|
+
isIncomplete = false;
|
848
|
+
return n;
|
849
|
+
}
|
838
850
|
}
|
839
851
|
}
|
840
852
|
|
@@ -873,28 +885,19 @@ struct Factorizer {
|
|
873
885
|
return vec;
|
874
886
|
}
|
875
887
|
|
876
|
-
|
877
|
-
// Factorize all "smooth parts."
|
878
|
-
std::vector<BigInteger> smoothParts;
|
879
|
-
std::map<BigInteger, boost::dynamic_bitset<size_t>> smoothPartsMap;
|
880
|
-
for (const BigInteger &n : (*semiSmoothParts)) {
|
881
|
-
const boost::dynamic_bitset<size_t> fv = factorizationVector(n);
|
882
|
-
if (fv.size()) {
|
883
|
-
smoothPartsMap[n] = fv;
|
884
|
-
smoothParts.push_back(n);
|
885
|
-
}
|
886
|
-
}
|
887
|
-
// We can clear the thread's buffer vector.
|
888
|
-
semiSmoothParts->clear();
|
889
|
-
|
888
|
+
BigInteger makeSmoothNumbers(std::vector<BigInteger> &smoothParts, std::map<BigInteger, boost::dynamic_bitset<size_t>> &smoothPartsMap) {
|
890
889
|
// This is the only nondeterminism in the algorithm.
|
891
890
|
std::shuffle(smoothParts.begin(), smoothParts.end(), rng);
|
892
|
-
|
893
|
-
const BigInteger limit = isGaussElim ? toFactor : toFactorSqrt;
|
894
|
-
|
895
891
|
// Now that smooth parts have been shuffled, just multiply down the list until they are larger than square root of toFactor.
|
896
892
|
BigInteger smoothNumber = 1U;
|
897
893
|
boost::dynamic_bitset<size_t> fv(primes.size(), 0);
|
894
|
+
std::vector<BigInteger> smoothNumberKeys(bigPrimes);
|
895
|
+
std::vector<boost::dynamic_bitset<size_t>> smoothNumberValues;
|
896
|
+
smoothNumberValues.reserve(primeFactors.size());
|
897
|
+
for (const auto& f : primeFactors) {
|
898
|
+
smoothNumberValues.emplace_back(f);
|
899
|
+
}
|
900
|
+
smoothNumberKeys.reserve(smoothParts.size());
|
898
901
|
for (size_t spi = 0U; spi < smoothParts.size(); ++spi) {
|
899
902
|
const BigInteger &sp = smoothParts[spi];
|
900
903
|
// This multiplies together the factorizations of the smooth parts
|
@@ -902,18 +905,19 @@ struct Factorizer {
|
|
902
905
|
fv ^= smoothPartsMap[sp];
|
903
906
|
smoothNumber *= sp;
|
904
907
|
// Check if the number is big enough
|
905
|
-
if (smoothNumber <=
|
908
|
+
if (smoothNumber <= toFactor) {
|
906
909
|
continue;
|
907
910
|
}
|
908
|
-
|
909
|
-
|
910
|
-
smoothNumberValues.emplace_back(fv);
|
911
|
-
smoothNumberKeys.push_back(smoothNumber);
|
912
|
-
}
|
911
|
+
smoothNumberValues.emplace_back(fv);
|
912
|
+
smoothNumberKeys.push_back(smoothNumber);
|
913
913
|
// Reset "smoothNumber" and its factorization vector.
|
914
914
|
smoothNumber = 1U;
|
915
915
|
fv = boost::dynamic_bitset<size_t>(primes.size(), 0);
|
916
916
|
}
|
917
|
+
smoothParts.clear();
|
918
|
+
smoothPartsMap.clear();
|
919
|
+
|
920
|
+
return findFactor(smoothNumberKeys, smoothNumberValues);
|
917
921
|
}
|
918
922
|
|
919
923
|
///////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
|
@@ -921,7 +925,7 @@ struct Factorizer {
|
|
921
925
|
///////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
|
922
926
|
|
923
927
|
// Perform Gaussian elimination on a binary matrix
|
924
|
-
void gaussianElimination() {
|
928
|
+
void gaussianElimination(std::vector<BigInteger> &smoothNumberKeys, std::vector<boost::dynamic_bitset<size_t>> &smoothNumberValues) {
|
925
929
|
const unsigned cpuCount = CpuCount;
|
926
930
|
auto mColIt = smoothNumberValues.begin();
|
927
931
|
auto nColIt = smoothNumberKeys.begin();
|
@@ -985,108 +989,48 @@ struct Factorizer {
|
|
985
989
|
}
|
986
990
|
|
987
991
|
BigInteger checkPerfectSquare(BigInteger perfectSquare) {
|
988
|
-
|
989
|
-
|
990
|
-
|
991
|
-
|
992
|
-
|
993
|
-
|
994
|
-
|
995
|
-
|
996
|
-
|
997
|
-
|
998
|
-
if (x == y) {
|
999
|
-
return 1U;
|
1000
|
-
}
|
1001
|
-
|
1002
|
-
// Try x - y as well
|
1003
|
-
factor = gcd(toFactor, x - y);
|
1004
|
-
if ((factor != 1U) && (factor != toFactor)) {
|
1005
|
-
return factor;
|
1006
|
-
}
|
1007
|
-
|
1008
|
-
return 1U;
|
1009
|
-
}
|
1010
|
-
|
1011
|
-
// Find duplicate rows
|
1012
|
-
BigInteger findDuplicateRows(const BigInteger &target) {
|
1013
|
-
// Check for linear dependencies and find a congruence of squares
|
1014
|
-
std::mutex rowMutex;
|
1015
|
-
BigInteger result = 1U;
|
1016
|
-
std::set<size_t> toStrike;
|
1017
|
-
auto iIt = smoothNumberValues.begin();
|
1018
|
-
const size_t rowCount = smoothNumberValues.size();
|
1019
|
-
const size_t rowCountMin1 = rowCount - 1U;
|
1020
|
-
for (size_t i = primes.size(); (i < rowCountMin1) && (result == 1U); ++i) {
|
1021
|
-
dispatch.dispatch([this, &target, i, iIt, &rowCount, &result, &rowMutex, &toStrike]() -> bool {
|
1022
|
-
boost::dynamic_bitset<size_t> &iRow = *iIt;
|
1023
|
-
const BigInteger& iInt = this->smoothNumberKeys[i];
|
1024
|
-
|
1025
|
-
const size_t startJ = std::max(this->rowOffset, i + 1U);
|
1026
|
-
auto jIt = this->smoothNumberValues.begin();
|
1027
|
-
std::advance(jIt, (startJ - 1U));
|
1028
|
-
for (size_t j = startJ; j < rowCount; ++j) {
|
1029
|
-
++jIt;
|
1030
|
-
|
1031
|
-
boost::dynamic_bitset<size_t> &jRow = *jIt;
|
1032
|
-
if (iRow != jRow) {
|
1033
|
-
continue;
|
1034
|
-
}
|
1035
|
-
|
1036
|
-
const BigInteger& jInt = this->smoothNumberKeys[j];
|
1037
|
-
if (iInt < jInt) {
|
1038
|
-
std::lock_guard<std::mutex> lock(rowMutex);
|
1039
|
-
toStrike.insert(j);
|
1040
|
-
} else {
|
1041
|
-
std::lock_guard<std::mutex> lock(rowMutex);
|
1042
|
-
toStrike.insert(i);
|
1043
|
-
}
|
1044
|
-
|
1045
|
-
const BigInteger factor = checkPerfectSquare(this->smoothNumberKeys[i]);
|
1046
|
-
if ((factor != 1U) && (factor != target)) {
|
1047
|
-
std::lock_guard<std::mutex> lock(rowMutex);
|
1048
|
-
result = factor;
|
1049
|
-
|
1050
|
-
return true;
|
1051
|
-
}
|
1052
|
-
}
|
992
|
+
while (perfectSquare < toFactorSqr) {
|
993
|
+
// Compute x and y
|
994
|
+
const BigInteger x = perfectSquare % toFactor;
|
995
|
+
const BigInteger y = modExp(x, toFactor >> 1U, toFactor);
|
996
|
+
|
997
|
+
// Check congruence of squares
|
998
|
+
BigInteger factor = gcd(toFactor, x + y);
|
999
|
+
if ((factor != 1U) && (factor != toFactor)) {
|
1000
|
+
return factor;
|
1001
|
+
}
|
1053
1002
|
|
1054
|
-
|
1055
|
-
|
1056
|
-
|
1057
|
-
}
|
1058
|
-
dispatch.finish();
|
1003
|
+
if (x == y) {
|
1004
|
+
continue;
|
1005
|
+
}
|
1059
1006
|
|
1060
|
-
|
1061
|
-
|
1062
|
-
|
1007
|
+
// Try x - y as well
|
1008
|
+
factor = gcd(toFactor, x - y);
|
1009
|
+
if ((factor != 1U) && (factor != toFactor)) {
|
1010
|
+
return factor;
|
1011
|
+
}
|
1063
1012
|
|
1064
|
-
|
1065
|
-
for (const size_t& i : toStrike) {
|
1066
|
-
smoothNumberKeys.erase(smoothNumberKeys.begin() + i);
|
1067
|
-
smoothNumberValues.erase(smoothNumberValues.begin() + i);
|
1013
|
+
perfectSquare *= sqrPrimes[dis(gen)];
|
1068
1014
|
}
|
1069
1015
|
|
1070
|
-
|
1071
|
-
|
1072
|
-
return 1U; // No factor found
|
1016
|
+
return 1U;
|
1073
1017
|
}
|
1074
1018
|
|
1075
1019
|
// Use Gaussian elimination
|
1076
|
-
BigInteger findFactor(
|
1020
|
+
BigInteger findFactor(std::vector<BigInteger> &smoothNumberKeys, std::vector<boost::dynamic_bitset<size_t>> &smoothNumberValues) {
|
1077
1021
|
// Gaussian elimination multiplies these numbers
|
1078
1022
|
// with small primes, to produce squares
|
1079
|
-
gaussianElimination();
|
1023
|
+
gaussianElimination(smoothNumberKeys, smoothNumberValues);
|
1080
1024
|
|
1081
1025
|
// Check for linear dependencies and find a congruence of squares
|
1082
1026
|
std::mutex rowMutex;
|
1083
1027
|
BigInteger result = 1U;
|
1084
1028
|
const size_t rowCount = smoothNumberKeys.size();
|
1085
1029
|
for (size_t i = primes.size(); (i < rowCount) && (result == 1U); ++i) {
|
1086
|
-
dispatch.dispatch([this,
|
1087
|
-
const BigInteger factor = checkPerfectSquare(
|
1030
|
+
dispatch.dispatch([this, i, &result, &rowMutex, &smoothNumberKeys]() -> bool {
|
1031
|
+
const BigInteger factor = checkPerfectSquare(smoothNumberKeys[i]);
|
1088
1032
|
|
1089
|
-
if ((factor != 1U) && (factor !=
|
1033
|
+
if ((factor != 1U) && (factor != this->toFactor)) {
|
1090
1034
|
std::lock_guard<std::mutex> lock(rowMutex);
|
1091
1035
|
result = factor;
|
1092
1036
|
|
@@ -1114,9 +1058,13 @@ struct Factorizer {
|
|
1114
1058
|
///////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
|
1115
1059
|
};
|
1116
1060
|
|
1117
|
-
std::string find_a_factor(
|
1118
|
-
size_t
|
1061
|
+
std::string find_a_factor(std::string toFactorStr, size_t method, size_t nodeCount, size_t nodeId, size_t trialDivisionLevel, size_t gearFactorizationLevel,
|
1062
|
+
size_t wheelFactorizationLevel, double smoothnessBoundMultiplier, double batchSizeMultiplier) {
|
1119
1063
|
// (At least) level 11 wheel factorization is baked into basic functions.
|
1064
|
+
if (method > 1U) {
|
1065
|
+
std::cout << "FACTOR_FINDER mode not yet implemented. Defaulting to MIXED." << std::endl;
|
1066
|
+
}
|
1067
|
+
const bool isConOfSqr = (method > 0);
|
1120
1068
|
if (!wheelFactorizationLevel) {
|
1121
1069
|
wheelFactorizationLevel = 1U;
|
1122
1070
|
} else if (wheelFactorizationLevel > 13U) {
|
@@ -1256,7 +1204,7 @@ std::string find_a_factor(const std::string &toFactorStr, const bool &isConOfSqr
|
|
1256
1204
|
return boost::lexical_cast<std::string>(result);
|
1257
1205
|
}
|
1258
1206
|
|
1259
|
-
const auto
|
1207
|
+
const auto workerFn = [&inc_seqs, &wheelEntryCount, &batchSizeMultiplier, &worker] {
|
1260
1208
|
// inc_seq needs to be independent per thread.
|
1261
1209
|
std::vector<boost::dynamic_bitset<size_t>> inc_seqs_clone;
|
1262
1210
|
inc_seqs_clone.reserve(inc_seqs.size());
|
@@ -1264,38 +1212,23 @@ std::string find_a_factor(const std::string &toFactorStr, const bool &isConOfSqr
|
|
1264
1212
|
inc_seqs_clone.emplace_back(b);
|
1265
1213
|
}
|
1266
1214
|
|
1267
|
-
// Different collections per thread;
|
1268
|
-
std::vector<BigInteger> semiSmoothParts;
|
1269
|
-
semiSmoothParts.reserve((size_t)((wheelEntryCount << 1U) * batchSizeMultiplier));
|
1270
|
-
|
1271
1215
|
// While brute-forcing, use the "exhaust" to feed "smooth" number generation and check conguence of squares.
|
1272
|
-
return worker.smoothCongruences(&inc_seqs_clone
|
1216
|
+
return worker.smoothCongruences(&inc_seqs_clone);
|
1273
1217
|
};
|
1274
1218
|
|
1275
1219
|
std::vector<std::future<BigInteger>> futures;
|
1276
1220
|
futures.reserve(CpuCount);
|
1277
1221
|
|
1278
|
-
|
1279
|
-
|
1280
|
-
|
1281
|
-
}
|
1282
|
-
|
1283
|
-
for (unsigned cpu = 0U; cpu < futures.size(); ++cpu) {
|
1284
|
-
const BigInteger r = futures[cpu].get();
|
1285
|
-
if ((r > result) && (r != toFactor)) {
|
1286
|
-
result = r;
|
1287
|
-
}
|
1288
|
-
}
|
1222
|
+
for (unsigned cpu = 0U; cpu < CpuCount; ++cpu) {
|
1223
|
+
futures.push_back(std::async(std::launch::async, workerFn));
|
1224
|
+
}
|
1289
1225
|
|
1290
|
-
|
1291
|
-
|
1226
|
+
for (unsigned cpu = 0U; cpu < futures.size(); ++cpu) {
|
1227
|
+
const BigInteger r = futures[cpu].get();
|
1228
|
+
if ((r > result) && (r != toFactor)) {
|
1229
|
+
result = r;
|
1292
1230
|
}
|
1293
|
-
|
1294
|
-
futures.clear();
|
1295
|
-
|
1296
|
-
// This next section is for (Quadratic Sieve) Gaussian elimination.
|
1297
|
-
result = isGaussElim ? worker.findFactor(toFactor) : worker.findDuplicateRows(toFactor);
|
1298
|
-
} while ((result == 1U) || (result == toFactor));
|
1231
|
+
}
|
1299
1232
|
|
1300
1233
|
return boost::lexical_cast<std::string>(result);
|
1301
1234
|
}
|
@@ -1305,5 +1238,5 @@ using namespace Qimcifa;
|
|
1305
1238
|
|
1306
1239
|
PYBIND11_MODULE(_find_a_factor, m) {
|
1307
1240
|
m.doc() = "pybind11 plugin to find any factor of input";
|
1308
|
-
m.def("_find_a_factor", &find_a_factor, "Finds any nontrivial factor of input (or returns 1 if prime)");
|
1241
|
+
m.def("_find_a_factor", &find_a_factor, "Finds any nontrivial factor of input (or returns 1 or the number to factor if prime)");
|
1309
1242
|
}
|
@@ -1,19 +1,25 @@
|
|
1
1
|
import os
|
2
2
|
import _find_a_factor
|
3
|
+
from enum import IntEnum
|
4
|
+
|
5
|
+
|
6
|
+
class FactoringMethod(IntEnum):
|
7
|
+
PRIME_SOLVER = 0
|
8
|
+
MIXED = 1
|
9
|
+
FACTOR_FINDER = 2
|
10
|
+
|
3
11
|
|
4
12
|
def find_a_factor(n,
|
5
|
-
|
6
|
-
use_gaussian_elimination=True if os.environ.get('FINDAFACTOR_USE_GAUSSIAN_ELIMINATION') else False,
|
13
|
+
method=FactoringMethod(int(os.environ.get('FINDAFACTOR_METHOD'))) if os.environ.get('FINDAFACTOR_METHOD') else FactoringMethod.PRIME_SOLVER,
|
7
14
|
node_count=int(os.environ.get('FINDAFACTOR_NODE_COUNT')) if os.environ.get('FINDAFACTOR_NODE_COUNT') else 1,
|
8
15
|
node_id=int(os.environ.get('FINDAFACTOR_NODE_ID')) if os.environ.get('FINDAFACTOR_NODE_ID') else 0,
|
9
16
|
trial_division_level=int(os.environ.get('FINDAFACTOR_TRIAL_DIVISION_LEVEL')) if os.environ.get('FINDAFACTOR_TRIAL_DIVISION_LEVEL') else (1<<20),
|
10
17
|
gear_factorization_level=int(os.environ.get('FINDAFACTOR_GEAR_FACTORIZATION_LEVEL')) if os.environ.get('FINDAFACTOR_GEAR_FACTORIZATION_LEVEL') else 11,
|
11
|
-
wheel_factorization_level=int(os.environ.get('FINDAFACTOR_WHEEL_FACTORIZATION_LEVEL')) if os.environ.get('FINDAFACTOR_WHEEL_FACTORIZATION_LEVEL') else
|
12
|
-
smoothness_bound_multiplier=float(os.environ.get('FINDAFACTOR_SMOOTHNESS_BOUND_MULTIPLIER')) if os.environ.get('FINDAFACTOR_SMOOTHNESS_BOUND_MULTIPLIER') else
|
13
|
-
batch_size_multiplier=float(os.environ.get('FINDAFACTOR_BATCH_SIZE_MULTIPLIER')) if os.environ.get('FINDAFACTOR_BATCH_SIZE_MULTIPLIER') else
|
18
|
+
wheel_factorization_level=int(os.environ.get('FINDAFACTOR_WHEEL_FACTORIZATION_LEVEL')) if os.environ.get('FINDAFACTOR_WHEEL_FACTORIZATION_LEVEL') else 11,
|
19
|
+
smoothness_bound_multiplier=float(os.environ.get('FINDAFACTOR_SMOOTHNESS_BOUND_MULTIPLIER')) if os.environ.get('FINDAFACTOR_SMOOTHNESS_BOUND_MULTIPLIER') else 0.25,
|
20
|
+
batch_size_multiplier=float(os.environ.get('FINDAFACTOR_BATCH_SIZE_MULTIPLIER')) if os.environ.get('FINDAFACTOR_BATCH_SIZE_MULTIPLIER') else 8.0):
|
14
21
|
return int(_find_a_factor._find_a_factor(str(n),
|
15
|
-
|
16
|
-
use_gaussian_elimination,
|
22
|
+
int(method),
|
17
23
|
node_count, node_id,
|
18
24
|
trial_division_level,
|
19
25
|
gear_factorization_level,
|
@@ -21,37 +21,35 @@ Windows users might find Windows Subsystem Linux (WSL) to be the easier and pref
|
|
21
21
|
## Usage
|
22
22
|
|
23
23
|
```py
|
24
|
-
from FindAFactor import find_a_factor
|
24
|
+
from FindAFactor import find_a_factor, FactoringMethod
|
25
25
|
|
26
26
|
to_factor = 1000
|
27
27
|
|
28
28
|
factor = find_a_factor(
|
29
29
|
to_factor,
|
30
|
-
|
31
|
-
use_gaussian_elimination=False,
|
30
|
+
method=FactoringMethod.PRIME_SOLVER,
|
32
31
|
node_count=1, node_id=0,
|
33
32
|
trial_division_level=2**20,
|
34
33
|
gear_factorization_level=11,
|
35
34
|
wheel_factorization_level=11,
|
36
|
-
smoothness_bound_multiplier=
|
37
|
-
batch_size_multiplier=
|
35
|
+
smoothness_bound_multiplier=0.25,
|
36
|
+
batch_size_multiplier=8.0
|
38
37
|
)
|
39
38
|
```
|
40
39
|
|
41
40
|
The `find_a_factor()` function should return any nontrivial factor of `to_factor` (that is, any factor besides `1` or `to_factor`) if it exists. If a nontrivial factor does _not_ exist (i.e., the number to factor is prime), the function will return `1` or the original `to_factor`.
|
42
41
|
|
43
|
-
- `
|
44
|
-
- `use_gaussian_elimination` (default value: `False`): This option is only relevant if `use_congruence_of_squares=True`. In that case, if `use_gaussian_elimination` is `True`, then proper Gaussian elimination is used, with **O(N^3)** worst case complexity but using potentially much smaller "N" count of rows. If the option is `False`, rather than Gaussian elimination, the algorithm checks only for exact factorization parity duplicates in the "smooth" number rows, for **O(N^2)** worst case complexity, but using a potentially much larger "N" count of rows.
|
42
|
+
- `method` (default value: `PRIME_SOLVER`/`0`): `PRIME_SOLVER`/`0` will prove that a number is prime (by failing to find any factors with wheel and gear factorization). `FACTOR_SOLVER`/`2` is optimized for the assumption that the number has at least two nontrivial factors. (`FACTOR_SOLVER`/`2` is not yet implemented, but this is the next development goal.) `MIXED`/`1` will be able to demonstrate that a number is prime, if necessary, while imitating some of the optimized behavior of `FACTOR_SOLVER`/`2`.
|
45
43
|
- `node_count` (default value: `1`): `FindAFactor` can perform factorization in a _distributed_ manner, across nodes, without network communication! When `node_count` is set higher than `1`, the search space for factors is segmented equally per node. If the number to factor is semiprime, and brute-force search is used instead of congruence of squares, for example, all nodes except the one that happens to contain the (unknown) prime factor less than the square root of `to_factor` will ultimately return `1`, while one node will find and return this factor. For best performance, every node involved in factorization should have roughly the same CPU throughput capacity.
|
46
44
|
- `node_id` (default value: `0`): This is the identifier of this node, when performing distributed factorization with `node_count` higher than `1`. `node_id` values start at `0` and go as high as `(node_count - 1)`.
|
47
45
|
- `trial_division_level` (default value: `2**20`): Trial division is carried out as a preliminary round for all primes up this number. If you need more primes for your smoothness bound, increase this level.
|
48
46
|
- `gear_factorization_level` (default value: `11`): This is the value up to which "wheel (and gear) factorization" and trial division are used to check factors and optimize "brute force," in general. The default value of `11` includes all prime factors of `11` and below and works well in general, though significantly higher might be preferred in certain cases.
|
49
47
|
- `wheel_factorization_level` (default value: `11`): "Wheel" vs. "gear" factorization balances two types of factorization wheel ("wheel" vs. "gear" design) that often work best when the "wheel" is only a few prime number levels lower than gear factorization. Optimized implementation for wheels is only available up to `13`. The primes above "wheel" level, up to "gear" level, are the primes used specifically for "gear" factorization.
|
50
|
-
- `smoothness_bound_multiplier` (default value: `
|
51
|
-
- `batch_size_multiplier` (default value: `
|
48
|
+
- `smoothness_bound_multiplier` (default value: `0.25`): starting with the first prime number after wheel factorization, the congruence of squares approach (with Quadratic Sieve) has a "smoothness bound" unit with as many distinct prime numbers as bits in the number to facto0r (for argument of `1.0` multiplier). To increase or decrease this number, consider it multiplied by the value of `smoothness_bound_multiplier`.
|
49
|
+
- `batch_size_multiplier` (default value: `8.0`): Each `1.0` increment of the multiplier is 2 cycles of gear and wheel factorization, alternating every other cycle between bottom of guessing range and top of guessing range, for every thread in use.
|
52
50
|
|
53
51
|
All variables defaults can also be controlled by environment variables:
|
54
|
-
- `
|
52
|
+
- `FINDAFACTOR_METHOD` (integer value)
|
55
53
|
- `FINDAFACTOR_NODE_COUNT`
|
56
54
|
- `FINDAFACTOR_NODE_ID`
|
57
55
|
- `FINDAFACTOR_TRIAL_DIVISION_LEVEL`
|
@@ -2,17 +2,13 @@
|
|
2
2
|
requires = [
|
3
3
|
"cmake",
|
4
4
|
"pybind11",
|
5
|
-
"setuptools"
|
6
|
-
"scikit-build"
|
5
|
+
"setuptools"
|
7
6
|
]
|
8
|
-
build-backend = "
|
9
|
-
|
10
|
-
[tool.scikit-build]
|
11
|
-
cmake.args = ["-DCMAKE_BUILD_TYPE=Release"]
|
7
|
+
build-backend = "setuptools.build_meta"
|
12
8
|
|
13
9
|
[project]
|
14
10
|
name = "FindAFactor"
|
15
|
-
version = "
|
11
|
+
version = "4.1.0"
|
16
12
|
requires-python = ">=3.8"
|
17
13
|
description = "Find any nontrivial factor of a number"
|
18
14
|
readme = {file = "README.txt", content-type = "text/markdown"}
|
@@ -1 +0,0 @@
|
|
1
|
-
from .find_a_factor import find_a_factor
|
@@ -1,386 +0,0 @@
|
|
1
|
-
//////////////////////////////////////////////////////////////////////////////////////
|
2
|
-
//
|
3
|
-
// (C) Daniel Strano and the Qrack contributors 2017-2023. All rights reserved.
|
4
|
-
//
|
5
|
-
// This is a multithreaded, universal quantum register simulation, allowing
|
6
|
-
// (nonphysical) register cloning and direct measurement of probability and
|
7
|
-
// phase, to leverage what advantages classical emulation of qubits can have.
|
8
|
-
//
|
9
|
-
// Licensed under the GNU Lesser General Public License V3.
|
10
|
-
// See LICENSE.md in the project root or https://www.gnu.org/licenses/lgpl-3.0.en.html
|
11
|
-
// for details.
|
12
|
-
|
13
|
-
#include "oclengine.hpp"
|
14
|
-
|
15
|
-
#include <algorithm>
|
16
|
-
#include <iostream>
|
17
|
-
#include <regex>
|
18
|
-
#include <sstream>
|
19
|
-
|
20
|
-
namespace Qimcifa {
|
21
|
-
|
22
|
-
/// "Qrack::OCLEngine" manages the single OpenCL context
|
23
|
-
|
24
|
-
// Public singleton methods to get pointers to various methods
|
25
|
-
DeviceContextPtr OCLEngine::GetDeviceContextPtr(const int64_t& dev)
|
26
|
-
{
|
27
|
-
if ((dev >= GetDeviceCount()) || (dev < -1) || (dev >= ((int64_t)all_device_contexts.size()))) {
|
28
|
-
throw std::invalid_argument("Invalid OpenCL device selection");
|
29
|
-
} else if (dev == -1) {
|
30
|
-
return default_device_context;
|
31
|
-
} else {
|
32
|
-
return all_device_contexts[dev];
|
33
|
-
}
|
34
|
-
}
|
35
|
-
|
36
|
-
// clang-format off
|
37
|
-
const std::vector<OCLKernelHandle> OCLEngine::kernelHandles{
|
38
|
-
OCLKernelHandle(OCL_API_FACTORIZE_SMOOTH, "factorize")
|
39
|
-
};
|
40
|
-
// clang-format on
|
41
|
-
|
42
|
-
const std::string OCLEngine::binary_file_prefix("ocl_dev_");
|
43
|
-
const std::string OCLEngine::binary_file_ext(".ir");
|
44
|
-
|
45
|
-
std::vector<DeviceContextPtr> OCLEngine::GetDeviceContextPtrVector() { return all_device_contexts; }
|
46
|
-
void OCLEngine::SetDeviceContextPtrVector(std::vector<DeviceContextPtr> vec, DeviceContextPtr dcp)
|
47
|
-
{
|
48
|
-
all_device_contexts = vec;
|
49
|
-
if (dcp != nullptr) {
|
50
|
-
default_device_context = dcp;
|
51
|
-
}
|
52
|
-
}
|
53
|
-
|
54
|
-
void OCLEngine::SetDefaultDeviceContext(DeviceContextPtr dcp) { default_device_context = dcp; }
|
55
|
-
|
56
|
-
cl::Program OCLEngine::MakeProgram(const size_t bitPow, std::shared_ptr<OCLDeviceContext> devCntxt)
|
57
|
-
{
|
58
|
-
// Load and build kernel
|
59
|
-
std::string kernelSourceStr =
|
60
|
-
"#define BCAPPOW " + std::to_string(bitPow) + "\n" +
|
61
|
-
"#define BIG_INTEGER_WORD_BITS 64U\n" +
|
62
|
-
"#define BIG_INTEGER_WORD_POWER 6U\n" +
|
63
|
-
"#define BIG_INTEGER_WORD ulong\n" +
|
64
|
-
"#define BIG_INTEGER_HALF_WORD uint\n" +
|
65
|
-
"#define BIG_INTEGER_HALF_WORD_MASK 0xFFFFFFFFULL\n" +
|
66
|
-
"#define BIG_INTEGER_HALF_WORD_MASK_NOT 0xFFFFFFFF00000000ULL\n" +
|
67
|
-
"\n" +
|
68
|
-
"// This can be any power of 2 greater than (or equal to) 64:\n" +
|
69
|
-
"const size_t BIG_INTEGER_BITS = (1 << BCAPPOW);\n" +
|
70
|
-
"const int BIG_INTEGER_WORD_SIZE = BIG_INTEGER_BITS / BIG_INTEGER_WORD_BITS;\n" +
|
71
|
-
"\n" +
|
72
|
-
"// The rest of the constants need to be consistent with the one above:\n" +
|
73
|
-
"const size_t BIG_INTEGER_HALF_WORD_BITS = BIG_INTEGER_WORD_BITS >> 1U;\n" +
|
74
|
-
"const int BIG_INTEGER_HALF_WORD_SIZE = BIG_INTEGER_WORD_SIZE << 1U;\n" +
|
75
|
-
"const int BIG_INTEGER_MAX_WORD_INDEX = BIG_INTEGER_WORD_SIZE - 1U;\n" +
|
76
|
-
"\n" +
|
77
|
-
"typedef struct BigInteger {\n" +
|
78
|
-
" BIG_INTEGER_WORD bits[BIG_INTEGER_WORD_SIZE];\n" +
|
79
|
-
"} BigInteger;\n" +
|
80
|
-
"\n" +
|
81
|
-
"inline void set(const BigInteger* o, BigInteger* n)\n" +
|
82
|
-
"{\n" +
|
83
|
-
" for (int i = 0; i < BIG_INTEGER_WORD_SIZE; ++i) {\n" +
|
84
|
-
" n->bits[i] = o->bits[i];\n" +
|
85
|
-
" }\n" +
|
86
|
-
"}\n" +
|
87
|
-
"\n" +
|
88
|
-
"inline void set_0(BigInteger* n)\n" +
|
89
|
-
"{\n" +
|
90
|
-
" for (int i = 0; i < BIG_INTEGER_WORD_SIZE; ++i) {\n" +
|
91
|
-
" n->bits[i] = 0U;\n" +
|
92
|
-
" }\n" +
|
93
|
-
"}\n" +
|
94
|
-
"\n" +
|
95
|
-
"inline void xor_bit(const BIG_INTEGER_HALF_WORD b, BigInteger* o) {\n" +
|
96
|
-
" o->bits[b % BIG_INTEGER_WORD_BITS] ^= (1ULL << (b / BIG_INTEGER_WORD_BITS));\n" +
|
97
|
-
"}\n" +
|
98
|
-
"inline int bi_compare_1(const BigInteger* left)\n" +
|
99
|
-
"{\n" +
|
100
|
-
" for (int i = BIG_INTEGER_MAX_WORD_INDEX; i > 0; --i) {\n" +
|
101
|
-
" if (left->bits[i]) {\n" +
|
102
|
-
" return 1;\n" +
|
103
|
-
" }\n" +
|
104
|
-
" }\n" +
|
105
|
-
" if (left->bits[0] > 1U) {\n" +
|
106
|
-
" return 1;\n" +
|
107
|
-
" }\n" +
|
108
|
-
" if (left->bits[0] < 1U) {\n" +
|
109
|
-
" return -1;\n" +
|
110
|
-
" }\n" +
|
111
|
-
"\n" +
|
112
|
-
" return 0;\n" +
|
113
|
-
"}\n" +
|
114
|
-
"\n" +
|
115
|
-
"// \"Schoolbook division\" (on half words)\n" +
|
116
|
-
"// Complexity - O(x^2)\n" +
|
117
|
-
"void bi_div_mod_small(\n" +
|
118
|
-
" const BigInteger* left, BIG_INTEGER_HALF_WORD right, BigInteger* quotient, BIG_INTEGER_HALF_WORD* rmndr)\n" +
|
119
|
-
"{\n" +
|
120
|
-
" BIG_INTEGER_WORD carry = 0U;\n" +
|
121
|
-
" if (quotient) {\n" +
|
122
|
-
" set_0(quotient);\n" +
|
123
|
-
" for (int i = BIG_INTEGER_HALF_WORD_SIZE - 1; i >= 0; --i) {\n" +
|
124
|
-
" const int i2 = i >> 1;\n" +
|
125
|
-
" carry <<= BIG_INTEGER_HALF_WORD_BITS;\n" +
|
126
|
-
" if (i & 1) {\n" +
|
127
|
-
" carry |= left->bits[i2] >> BIG_INTEGER_HALF_WORD_BITS;\n" +
|
128
|
-
" quotient->bits[i2] |= (carry / right) << BIG_INTEGER_HALF_WORD_BITS;\n" +
|
129
|
-
" } else {\n" +
|
130
|
-
" carry |= left->bits[i2] & BIG_INTEGER_HALF_WORD_MASK;\n" +
|
131
|
-
" quotient->bits[i2] |= (carry / right);\n" +
|
132
|
-
" }\n" +
|
133
|
-
" carry %= right;\n" +
|
134
|
-
" }\n" +
|
135
|
-
" } else {\n" +
|
136
|
-
" for (int i = BIG_INTEGER_HALF_WORD_SIZE - 1; i >= 0; --i) {\n" +
|
137
|
-
" const int i2 = i >> 1;\n" +
|
138
|
-
" carry <<= BIG_INTEGER_HALF_WORD_BITS;\n" +
|
139
|
-
" if (i & 1) {\n" +
|
140
|
-
" carry |= left->bits[i2] >> BIG_INTEGER_HALF_WORD_BITS;\n" +
|
141
|
-
" } else {\n" +
|
142
|
-
" carry |= left->bits[i2] & BIG_INTEGER_HALF_WORD_MASK;\n" +
|
143
|
-
" }\n" +
|
144
|
-
" carry %= right;\n" +
|
145
|
-
" }\n" +
|
146
|
-
" }\n" +
|
147
|
-
"\n" +
|
148
|
-
" *rmndr = carry;\n" +
|
149
|
-
"}\n" +
|
150
|
-
"\n" +
|
151
|
-
"__kernel void factorize(\n" +
|
152
|
-
" __global const BigInteger *numbers, // Array of numbers to check\n" +
|
153
|
-
" __global const int *primes, // Array of small primes for smoothness\n" +
|
154
|
-
" __global bool *results, // Output: 1 if smooth, 0 if not\n" +
|
155
|
-
" __global const BigInteger *factor_vectors, // Output: Factorization vectors as bitmasks\n" +
|
156
|
-
" const int primeCount // Number of primes in the array\n" +
|
157
|
-
") {\n" +
|
158
|
-
" int gid = get_global_id(0); // Get the index of this work item\n" +
|
159
|
-
" BigInteger number, factor_vector, q;\n" +
|
160
|
-
" set(&numbers[gid], &number); // The number to check\n" +
|
161
|
-
" set_0(&factor_vector); // Initialize the factor vector as 0\n" +
|
162
|
-
"\n" +
|
163
|
-
" // Test divisibility by each prime\n" +
|
164
|
-
" for (int i = 0; i < primeCount; ++i) {\n" +
|
165
|
-
" const uint p = (uint)primes[i];\n" +
|
166
|
-
" do {\n" +
|
167
|
-
" unsigned int r = 0U;\n" +
|
168
|
-
" bi_div_mod_small(&number, p, &q, &r);\n" +
|
169
|
-
" if (r) {\n" +
|
170
|
-
" break;"
|
171
|
-
" }\n" +
|
172
|
-
" set(&q, &number);\n" +
|
173
|
-
" xor_bit(i, &factor_vector); // Flip the corresponding bit\n" +
|
174
|
-
" } while (true);\n" +
|
175
|
-
" }\n" +
|
176
|
-
"\n" +
|
177
|
-
" // If number is reduced to 1, it is smooth\n" +
|
178
|
-
" results[gid] = bi_compare_1(&number) == 0;\n" +
|
179
|
-
"\n" +
|
180
|
-
" // Store the factor vector\n" +
|
181
|
-
" set(&factor_vector, &(factor_vectors[gid]));\n" +
|
182
|
-
"}\n";
|
183
|
-
|
184
|
-
cl::Program::Sources sources;
|
185
|
-
sources.push_back({(const char*)kernelSourceStr.c_str(), (long unsigned int)(kernelSourceStr.size() + 1U) });
|
186
|
-
|
187
|
-
cl::Program program = cl::Program(devCntxt->context, sources);
|
188
|
-
std::cout << "Building JIT." << std::endl;
|
189
|
-
|
190
|
-
return program;
|
191
|
-
}
|
192
|
-
|
193
|
-
void OCLEngine::SaveBinary(cl::Program program, std::string path, std::string fileName)
|
194
|
-
{
|
195
|
-
std::vector<size_t> clBinSizes = program.getInfo<CL_PROGRAM_BINARY_SIZES>();
|
196
|
-
size_t clBinSize = 0U;
|
197
|
-
int64_t clBinIndex = 0;
|
198
|
-
|
199
|
-
for (size_t i = 0U; i < clBinSizes.size(); ++i) {
|
200
|
-
if (clBinSizes[i]) {
|
201
|
-
clBinSize = clBinSizes[i];
|
202
|
-
clBinIndex = i;
|
203
|
-
break;
|
204
|
-
}
|
205
|
-
}
|
206
|
-
|
207
|
-
std::cout << "Binary size:" << clBinSize << std::endl;
|
208
|
-
|
209
|
-
#if defined(_WIN32) && !defined(__CYGWIN__)
|
210
|
-
int err = _mkdir(path.c_str());
|
211
|
-
#else
|
212
|
-
int err = mkdir(path.c_str(), 0700);
|
213
|
-
#endif
|
214
|
-
if (err != -1) {
|
215
|
-
std::cout << "Making directory: " << path << std::endl;
|
216
|
-
}
|
217
|
-
|
218
|
-
FILE* clBinFile = fopen((path + fileName).c_str(), "w");
|
219
|
-
std::vector<std::vector<unsigned char>> clBinaries = program.getInfo<CL_PROGRAM_BINARIES>();
|
220
|
-
std::vector<unsigned char> clBinary = clBinaries[clBinIndex];
|
221
|
-
fwrite(&clBinary[0U], clBinSize, sizeof(unsigned char), clBinFile);
|
222
|
-
fclose(clBinFile);
|
223
|
-
}
|
224
|
-
|
225
|
-
InitOClResult OCLEngine::InitOCL(const size_t bitPow, std::vector<int64_t> maxAllocVec)
|
226
|
-
{
|
227
|
-
if (!bitPow) {
|
228
|
-
throw std::runtime_error("Cannot InitOCL with default of 0 bits!");
|
229
|
-
}
|
230
|
-
// get all platforms (drivers), e.g. NVIDIA
|
231
|
-
|
232
|
-
std::vector<cl::Platform> all_platforms;
|
233
|
-
std::vector<cl::Device> all_devices;
|
234
|
-
std::vector<int64_t> device_platform_id;
|
235
|
-
cl::Platform default_platform;
|
236
|
-
cl::Device default_device;
|
237
|
-
std::vector<DeviceContextPtr> all_dev_contexts;
|
238
|
-
DeviceContextPtr default_dev_context;
|
239
|
-
|
240
|
-
cl::Platform::get(&all_platforms);
|
241
|
-
|
242
|
-
if (all_platforms.empty()) {
|
243
|
-
std::cout << " No platforms found. Check OpenCL installation!\n";
|
244
|
-
return InitOClResult();
|
245
|
-
}
|
246
|
-
|
247
|
-
// get all devices
|
248
|
-
std::vector<cl::Platform> devPlatVec;
|
249
|
-
std::vector<std::vector<cl::Device>> all_platforms_devices;
|
250
|
-
std::vector<bool> all_devices_is_gpu;
|
251
|
-
std::vector<bool> all_devices_is_cpu;
|
252
|
-
for (size_t i = 0U; i < all_platforms.size(); ++i) {
|
253
|
-
all_platforms_devices.push_back(std::vector<cl::Device>());
|
254
|
-
all_platforms[i].getDevices(CL_DEVICE_TYPE_ALL, &(all_platforms_devices[i]));
|
255
|
-
for (size_t j = 0U; j < all_platforms_devices[i].size(); ++j) {
|
256
|
-
// VirtualCL seems to break if the assignment constructor of cl::Platform is used here from the original
|
257
|
-
// list. Assigning the object from a new query is always fine, though. (They carry the same underlying
|
258
|
-
// platform IDs.)
|
259
|
-
std::vector<cl::Platform> temp_platforms;
|
260
|
-
cl::Platform::get(&temp_platforms);
|
261
|
-
devPlatVec.push_back(temp_platforms[i]);
|
262
|
-
device_platform_id.push_back(i);
|
263
|
-
}
|
264
|
-
all_devices.insert(all_devices.end(), all_platforms_devices[i].begin(), all_platforms_devices[i].end());
|
265
|
-
|
266
|
-
// Linux implements `cl::Device` relation operators, including equality, but Mac considers OpenCL "deprecated,"
|
267
|
-
// and other compilers might not see a strict need in OpenCL implementation standard for a `cl::Device` equality
|
268
|
-
// operator, which would allow the use of `std::find()`.
|
269
|
-
std::vector<cl::Device> gpu_devices;
|
270
|
-
all_platforms[i].getDevices(CL_DEVICE_TYPE_GPU, &gpu_devices);
|
271
|
-
std::vector<bool> gpu_to_insert(all_platforms_devices[i].size(), false);
|
272
|
-
for (size_t j = 0U; j < gpu_devices.size(); ++j) {
|
273
|
-
for (size_t k = 0U; k < all_platforms_devices[i].size(); ++k) {
|
274
|
-
if (gpu_devices[j].getInfo<CL_DEVICE_NAME>() == all_platforms_devices[i][j].getInfo<CL_DEVICE_NAME>()) {
|
275
|
-
// Assuming all devices with the same name are identical vendor, line, and model, this works.
|
276
|
-
gpu_to_insert[k] = true;
|
277
|
-
}
|
278
|
-
}
|
279
|
-
}
|
280
|
-
all_devices_is_gpu.insert(all_devices_is_gpu.end(), gpu_to_insert.begin(), gpu_to_insert.end());
|
281
|
-
|
282
|
-
std::vector<cl::Device> cpu_devices;
|
283
|
-
all_platforms[i].getDevices(CL_DEVICE_TYPE_CPU, &cpu_devices);
|
284
|
-
std::vector<bool> cpu_to_insert(all_platforms_devices[i].size(), false);
|
285
|
-
for (size_t j = 0U; j < cpu_devices.size(); ++j) {
|
286
|
-
for (size_t k = 0U; k < all_platforms_devices[i].size(); ++k) {
|
287
|
-
if (cpu_devices[j].getInfo<CL_DEVICE_NAME>() == all_platforms_devices[i][j].getInfo<CL_DEVICE_NAME>()) {
|
288
|
-
// Assuming all devices with the same name are identical vendor, line, and model, this works.
|
289
|
-
cpu_to_insert[k] = true;
|
290
|
-
}
|
291
|
-
}
|
292
|
-
}
|
293
|
-
all_devices_is_cpu.insert(all_devices_is_cpu.end(), cpu_to_insert.begin(), cpu_to_insert.end());
|
294
|
-
}
|
295
|
-
if (all_devices.empty()) {
|
296
|
-
std::cout << " No devices found. Check OpenCL installation!\n";
|
297
|
-
return InitOClResult();
|
298
|
-
}
|
299
|
-
|
300
|
-
int64_t deviceCount = all_devices.size();
|
301
|
-
// prefer the last device because that's usually a GPU or accelerator; device[0U] is usually the CPU
|
302
|
-
int64_t dev = deviceCount - 1;
|
303
|
-
if (getenv("FINDAFACTOR_OCL_DEFAULT_DEVICE")) {
|
304
|
-
dev = std::stoi(std::string(getenv("FINDAFACTOR_OCL_DEFAULT_DEVICE")));
|
305
|
-
if ((dev < 0) || (dev > (deviceCount - 1))) {
|
306
|
-
std::cout << "WARNING: Invalid FINDAFACTOR_OCL_DEFAULT_DEVICE selection. (Falling back to highest index device "
|
307
|
-
"as default.)"
|
308
|
-
<< std::endl;
|
309
|
-
dev = deviceCount - 1;
|
310
|
-
}
|
311
|
-
}
|
312
|
-
|
313
|
-
// create the programs that we want to execute on the devices
|
314
|
-
int64_t plat_id = -1;
|
315
|
-
std::vector<cl::Context> all_contexts;
|
316
|
-
std::vector<std::string> all_filenames;
|
317
|
-
for (int64_t i = 0; i < deviceCount; ++i) {
|
318
|
-
// a context is like a "runtime link" to the device and platform;
|
319
|
-
// i.e. communication is possible
|
320
|
-
if (device_platform_id[i] != plat_id) {
|
321
|
-
plat_id = device_platform_id[i];
|
322
|
-
all_contexts.push_back(cl::Context(all_platforms_devices[plat_id]));
|
323
|
-
}
|
324
|
-
const std::string devName(all_devices[i].getInfo<CL_DEVICE_NAME>());
|
325
|
-
const bool useHostRam = all_devices_is_cpu[i] || (devName.find("Intel(R) UHD") != std::string::npos) ||
|
326
|
-
(devName.find("Iris") != std::string::npos);
|
327
|
-
DeviceContextPtr devCntxt =
|
328
|
-
std::make_shared<OCLDeviceContext>(devPlatVec[i], all_devices[i], all_contexts[all_contexts.size() - 1U], i,
|
329
|
-
plat_id, maxAllocVec[i % maxAllocVec.size()], all_devices_is_gpu[i], all_devices_is_cpu[i], useHostRam);
|
330
|
-
|
331
|
-
std::cout << "Device #" << i << ", ";
|
332
|
-
cl::Program program = MakeProgram(bitPow, devCntxt);
|
333
|
-
|
334
|
-
cl_int buildError =
|
335
|
-
program.build({ all_devices[i] }, "-cl-strict-aliasing -cl-denorms-are-zero -cl-fast-relaxed-math");
|
336
|
-
if (buildError != CL_SUCCESS) {
|
337
|
-
std::cout << "Error building for device #" << i << ": " << buildError << ", "
|
338
|
-
<< program.getBuildInfo<CL_PROGRAM_BUILD_STATUS>(all_devices[i])
|
339
|
-
<< program.getBuildInfo<CL_PROGRAM_BUILD_LOG>(all_devices[i]) << std::endl;
|
340
|
-
|
341
|
-
// The default device was set above to be the last device in the list. If we can't compile for it, we
|
342
|
-
// use the first device. If the default is the first device, and we can't compile for it, then we don't
|
343
|
-
// have any devices that can compile at all, and the environment needs to be fixed by the user.
|
344
|
-
if (i == dev) {
|
345
|
-
default_dev_context = all_dev_contexts[0U];
|
346
|
-
default_platform = all_platforms[0U];
|
347
|
-
default_device = all_devices[0U];
|
348
|
-
}
|
349
|
-
|
350
|
-
continue;
|
351
|
-
}
|
352
|
-
|
353
|
-
all_dev_contexts.push_back(devCntxt);
|
354
|
-
|
355
|
-
for (unsigned int j = 0U; j < kernelHandles.size(); ++j) {
|
356
|
-
all_dev_contexts[i]->calls[kernelHandles[j].oclapi] =
|
357
|
-
cl::Kernel(program, kernelHandles[j].kernelname.c_str());
|
358
|
-
all_dev_contexts[i]->mutexes.emplace(kernelHandles[j].oclapi, new std::mutex);
|
359
|
-
}
|
360
|
-
|
361
|
-
if (i == dev) {
|
362
|
-
default_dev_context = all_dev_contexts[i];
|
363
|
-
default_platform = all_platforms[plat_id];
|
364
|
-
default_device = all_devices[i];
|
365
|
-
}
|
366
|
-
}
|
367
|
-
|
368
|
-
// For VirtualCL support, the device info can only be accessed AFTER all contexts are created.
|
369
|
-
std::cout << "Default platform: " << default_platform.getInfo<CL_PLATFORM_NAME>() << "\n";
|
370
|
-
std::cout << "Default device: #" << dev << ", " << default_device.getInfo<CL_DEVICE_NAME>() << "\n";
|
371
|
-
for (int64_t i = 0; i < deviceCount; ++i) {
|
372
|
-
std::cout << "OpenCL device #" << i << ": " << all_devices[i].getInfo<CL_DEVICE_NAME>() << "\n";
|
373
|
-
}
|
374
|
-
|
375
|
-
return InitOClResult(all_dev_contexts, default_dev_context);
|
376
|
-
}
|
377
|
-
|
378
|
-
OCLEngine::OCLEngine(const size_t bitPow)
|
379
|
-
: maxActiveAllocSizes(1U, -1)
|
380
|
-
{
|
381
|
-
InitOClResult initResult = InitOCL(bitPow, maxActiveAllocSizes);
|
382
|
-
SetDeviceContextPtrVector(initResult.all_dev_contexts, initResult.default_dev_context);
|
383
|
-
activeAllocSizes = std::vector<size_t>(initResult.all_dev_contexts.size());
|
384
|
-
}
|
385
|
-
|
386
|
-
} // namespace Qrack
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|