@engine9-io/input-tools 1.6.0 → 1.6.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/file/FileUtilities.js +27 -11
- package/package.json +1 -1
package/file/FileUtilities.js
CHANGED
@@ -824,6 +824,8 @@ Worker.prototype.getUniqueSet = async function (options) {
|
|
824
824
|
};
|
825
825
|
|
826
826
|
Worker.prototype.getUniqueStream = async function (options) {
|
827
|
+
const includeDuplicateSourceRecords = bool(options.includeDuplicateSourceRecords, false);
|
828
|
+
|
827
829
|
const { uniqueSet, uniqueFunction, sample } = await this.getUniqueSet({
|
828
830
|
filenames: options.existingFiles,
|
829
831
|
uniqueFunction: options.uniqueFunction,
|
@@ -844,7 +846,10 @@ Worker.prototype.getUniqueStream = async function (options) {
|
|
844
846
|
// do nothing
|
845
847
|
cb();
|
846
848
|
} else {
|
847
|
-
|
849
|
+
if (!includeDuplicateSourceRecords) {
|
850
|
+
// add it to the set for the next time
|
851
|
+
uniqueSet.add(v);
|
852
|
+
}
|
848
853
|
cb(null, d);
|
849
854
|
}
|
850
855
|
},
|
@@ -857,8 +862,11 @@ Worker.prototype.getUniqueStream.metadata = {
|
|
857
862
|
options: {
|
858
863
|
existingFiles: {},
|
859
864
|
uniqueFunction: {},
|
860
|
-
filename: {},
|
861
|
-
stream: {},
|
865
|
+
filename: { description: 'Specify a source filename or a stream' },
|
866
|
+
stream: { description: 'Specify a source filename or a stream' },
|
867
|
+
includeDuplicateSourceRecords: {
|
868
|
+
description: 'Sometimes you want the output to include source dupes, sometimes not, default false',
|
869
|
+
},
|
862
870
|
},
|
863
871
|
};
|
864
872
|
Worker.prototype.getUniqueFile = async function (options) {
|
@@ -871,8 +879,11 @@ Worker.prototype.getUniqueFile.metadata = {
|
|
871
879
|
options: {
|
872
880
|
existingFiles: {},
|
873
881
|
uniqueFunction: {},
|
874
|
-
filename: {},
|
875
|
-
stream: {},
|
882
|
+
filename: { description: 'Specify a source filename or a stream' },
|
883
|
+
stream: { description: 'Specify a source filename or a stream' },
|
884
|
+
includeDuplicateSourceRecords: {
|
885
|
+
description: 'Sometimes you want the output to include source dupes, sometimes not, default false',
|
886
|
+
},
|
876
887
|
},
|
877
888
|
};
|
878
889
|
|
@@ -882,7 +893,7 @@ Requires 2 passes of the files,
|
|
882
893
|
but that's a better tradeoff than trying to store huge files in memory
|
883
894
|
*/
|
884
895
|
Worker.prototype.diff = async function ({
|
885
|
-
fileA, fileB, uniqueFunction: ufOpt, fields,
|
896
|
+
fileA, fileB, uniqueFunction: ufOpt, fields, includeDuplicateSourceRecords,
|
886
897
|
}) {
|
887
898
|
if (ufOpt && fields) throw new Error('fields and uniqueFunction cannot both be specified');
|
888
899
|
let uniqueFunction = ufOpt;
|
@@ -891,17 +902,19 @@ Worker.prototype.diff = async function ({
|
|
891
902
|
uniqueFunction = (o) => farr.map((f) => o[f] || '').join('.');
|
892
903
|
}
|
893
904
|
|
905
|
+
const left = await this.getUniqueFile({
|
906
|
+
existingFiles: [fileB],
|
907
|
+
filename: fileA,
|
908
|
+
uniqueFunction,
|
909
|
+
includeDuplicateSourceRecords,
|
910
|
+
});
|
894
911
|
const right = await this.getUniqueFile({
|
895
912
|
existingFiles: [fileA],
|
896
913
|
filename: fileB,
|
897
914
|
uniqueFunction,
|
915
|
+
includeDuplicateSourceRecords,
|
898
916
|
});
|
899
917
|
|
900
|
-
const left = await this.getUniqueFile({
|
901
|
-
existingFiles: [fileB],
|
902
|
-
filename: fileA,
|
903
|
-
uniqueFunction,
|
904
|
-
});
|
905
918
|
return {
|
906
919
|
left, right,
|
907
920
|
};
|
@@ -912,6 +925,9 @@ Worker.prototype.diff.metadata = {
|
|
912
925
|
fileB: {},
|
913
926
|
fields: { description: 'Fields to use for uniqueness -- aka primary key. Defaults to JSON of line' },
|
914
927
|
uniqueFunction: {},
|
928
|
+
includeDuplicateSourceRecords: {
|
929
|
+
description: 'Sometimes you want the output to include source dupes, sometimes not, default false',
|
930
|
+
},
|
915
931
|
},
|
916
932
|
};
|
917
933
|
|