midas-edge 0.1.0

Sign up to get free protection for your applications and to get access to all the features.
@@ -0,0 +1,25 @@
1
+ module Midas
2
+ class Detector
3
+ def initialize(rows: 2, buckets: 769, alpha: 0.6, relations: true, directed: true)
4
+ @rows = rows
5
+ @buckets = buckets
6
+ @alpha = alpha
7
+ @relations = relations
8
+ @directed = directed
9
+ end
10
+
11
+ def fit_predict(x)
12
+ result =
13
+ if x.is_a?(String)
14
+ _fit_predict_file(x, @rows, @buckets, @alpha, @relations, @directed)
15
+ else
16
+ x = Numo::Int32.cast(x) unless x.is_a?(Numo::NArray)
17
+ x = x.cast_to(Numo::Int32) unless x.is_a?(Numo::Int32)
18
+ raise ArgumentError, "Bad shape: #{x.shape}" unless x.rank == 2 && x.shape[1] == 3
19
+ _fit_predict_str(x.to_binary, @rows, @buckets, @alpha, @relations, @directed)
20
+ end
21
+
22
+ Numo::DFloat.from_binary(result)
23
+ end
24
+ end
25
+ end
Binary file
@@ -0,0 +1,3 @@
1
+ module Midas
2
+ VERSION = "0.1.0"
3
+ end
@@ -0,0 +1,201 @@
1
+ Apache License
2
+ Version 2.0, January 2004
3
+ http://www.apache.org/licenses/
4
+
5
+ TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
6
+
7
+ 1. Definitions.
8
+
9
+ "License" shall mean the terms and conditions for use, reproduction,
10
+ and distribution as defined by Sections 1 through 9 of this document.
11
+
12
+ "Licensor" shall mean the copyright owner or entity authorized by
13
+ the copyright owner that is granting the License.
14
+
15
+ "Legal Entity" shall mean the union of the acting entity and all
16
+ other entities that control, are controlled by, or are under common
17
+ control with that entity. For the purposes of this definition,
18
+ "control" means (i) the power, direct or indirect, to cause the
19
+ direction or management of such entity, whether by contract or
20
+ otherwise, or (ii) ownership of fifty percent (50%) or more of the
21
+ outstanding shares, or (iii) beneficial ownership of such entity.
22
+
23
+ "You" (or "Your") shall mean an individual or Legal Entity
24
+ exercising permissions granted by this License.
25
+
26
+ "Source" form shall mean the preferred form for making modifications,
27
+ including but not limited to software source code, documentation
28
+ source, and configuration files.
29
+
30
+ "Object" form shall mean any form resulting from mechanical
31
+ transformation or translation of a Source form, including but
32
+ not limited to compiled object code, generated documentation,
33
+ and conversions to other media types.
34
+
35
+ "Work" shall mean the work of authorship, whether in Source or
36
+ Object form, made available under the License, as indicated by a
37
+ copyright notice that is included in or attached to the work
38
+ (an example is provided in the Appendix below).
39
+
40
+ "Derivative Works" shall mean any work, whether in Source or Object
41
+ form, that is based on (or derived from) the Work and for which the
42
+ editorial revisions, annotations, elaborations, or other modifications
43
+ represent, as a whole, an original work of authorship. For the purposes
44
+ of this License, Derivative Works shall not include works that remain
45
+ separable from, or merely link (or bind by name) to the interfaces of,
46
+ the Work and Derivative Works thereof.
47
+
48
+ "Contribution" shall mean any work of authorship, including
49
+ the original version of the Work and any modifications or additions
50
+ to that Work or Derivative Works thereof, that is intentionally
51
+ submitted to Licensor for inclusion in the Work by the copyright owner
52
+ or by an individual or Legal Entity authorized to submit on behalf of
53
+ the copyright owner. For the purposes of this definition, "submitted"
54
+ means any form of electronic, verbal, or written communication sent
55
+ to the Licensor or its representatives, including but not limited to
56
+ communication on electronic mailing lists, source code control systems,
57
+ and issue tracking systems that are managed by, or on behalf of, the
58
+ Licensor for the purpose of discussing and improving the Work, but
59
+ excluding communication that is conspicuously marked or otherwise
60
+ designated in writing by the copyright owner as "Not a Contribution."
61
+
62
+ "Contributor" shall mean Licensor and any individual or Legal Entity
63
+ on behalf of whom a Contribution has been received by Licensor and
64
+ subsequently incorporated within the Work.
65
+
66
+ 2. Grant of Copyright License. Subject to the terms and conditions of
67
+ this License, each Contributor hereby grants to You a perpetual,
68
+ worldwide, non-exclusive, no-charge, royalty-free, irrevocable
69
+ copyright license to reproduce, prepare Derivative Works of,
70
+ publicly display, publicly perform, sublicense, and distribute the
71
+ Work and such Derivative Works in Source or Object form.
72
+
73
+ 3. Grant of Patent License. Subject to the terms and conditions of
74
+ this License, each Contributor hereby grants to You a perpetual,
75
+ worldwide, non-exclusive, no-charge, royalty-free, irrevocable
76
+ (except as stated in this section) patent license to make, have made,
77
+ use, offer to sell, sell, import, and otherwise transfer the Work,
78
+ where such license applies only to those patent claims licensable
79
+ by such Contributor that are necessarily infringed by their
80
+ Contribution(s) alone or by combination of their Contribution(s)
81
+ with the Work to which such Contribution(s) was submitted. If You
82
+ institute patent litigation against any entity (including a
83
+ cross-claim or counterclaim in a lawsuit) alleging that the Work
84
+ or a Contribution incorporated within the Work constitutes direct
85
+ or contributory patent infringement, then any patent licenses
86
+ granted to You under this License for that Work shall terminate
87
+ as of the date such litigation is filed.
88
+
89
+ 4. Redistribution. You may reproduce and distribute copies of the
90
+ Work or Derivative Works thereof in any medium, with or without
91
+ modifications, and in Source or Object form, provided that You
92
+ meet the following conditions:
93
+
94
+ (a) You must give any other recipients of the Work or
95
+ Derivative Works a copy of this License; and
96
+
97
+ (b) You must cause any modified files to carry prominent notices
98
+ stating that You changed the files; and
99
+
100
+ (c) You must retain, in the Source form of any Derivative Works
101
+ that You distribute, all copyright, patent, trademark, and
102
+ attribution notices from the Source form of the Work,
103
+ excluding those notices that do not pertain to any part of
104
+ the Derivative Works; and
105
+
106
+ (d) If the Work includes a "NOTICE" text file as part of its
107
+ distribution, then any Derivative Works that You distribute must
108
+ include a readable copy of the attribution notices contained
109
+ within such NOTICE file, excluding those notices that do not
110
+ pertain to any part of the Derivative Works, in at least one
111
+ of the following places: within a NOTICE text file distributed
112
+ as part of the Derivative Works; within the Source form or
113
+ documentation, if provided along with the Derivative Works; or,
114
+ within a display generated by the Derivative Works, if and
115
+ wherever such third-party notices normally appear. The contents
116
+ of the NOTICE file are for informational purposes only and
117
+ do not modify the License. You may add Your own attribution
118
+ notices within Derivative Works that You distribute, alongside
119
+ or as an addendum to the NOTICE text from the Work, provided
120
+ that such additional attribution notices cannot be construed
121
+ as modifying the License.
122
+
123
+ You may add Your own copyright statement to Your modifications and
124
+ may provide additional or different license terms and conditions
125
+ for use, reproduction, or distribution of Your modifications, or
126
+ for any such Derivative Works as a whole, provided Your use,
127
+ reproduction, and distribution of the Work otherwise complies with
128
+ the conditions stated in this License.
129
+
130
+ 5. Submission of Contributions. Unless You explicitly state otherwise,
131
+ any Contribution intentionally submitted for inclusion in the Work
132
+ by You to the Licensor shall be under the terms and conditions of
133
+ this License, without any additional terms or conditions.
134
+ Notwithstanding the above, nothing herein shall supersede or modify
135
+ the terms of any separate license agreement you may have executed
136
+ with Licensor regarding such Contributions.
137
+
138
+ 6. Trademarks. This License does not grant permission to use the trade
139
+ names, trademarks, service marks, or product names of the Licensor,
140
+ except as required for reasonable and customary use in describing the
141
+ origin of the Work and reproducing the content of the NOTICE file.
142
+
143
+ 7. Disclaimer of Warranty. Unless required by applicable law or
144
+ agreed to in writing, Licensor provides the Work (and each
145
+ Contributor provides its Contributions) on an "AS IS" BASIS,
146
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
147
+ implied, including, without limitation, any warranties or conditions
148
+ of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
149
+ PARTICULAR PURPOSE. You are solely responsible for determining the
150
+ appropriateness of using or redistributing the Work and assume any
151
+ risks associated with Your exercise of permissions under this License.
152
+
153
+ 8. Limitation of Liability. In no event and under no legal theory,
154
+ whether in tort (including negligence), contract, or otherwise,
155
+ unless required by applicable law (such as deliberate and grossly
156
+ negligent acts) or agreed to in writing, shall any Contributor be
157
+ liable to You for damages, including any direct, indirect, special,
158
+ incidental, or consequential damages of any character arising as a
159
+ result of this License or out of the use or inability to use the
160
+ Work (including but not limited to damages for loss of goodwill,
161
+ work stoppage, computer failure or malfunction, or any and all
162
+ other commercial damages or losses), even if such Contributor
163
+ has been advised of the possibility of such damages.
164
+
165
+ 9. Accepting Warranty or Additional Liability. While redistributing
166
+ the Work or Derivative Works thereof, You may choose to offer,
167
+ and charge a fee for, acceptance of support, warranty, indemnity,
168
+ or other liability obligations and/or rights consistent with this
169
+ License. However, in accepting such obligations, You may act only
170
+ on Your own behalf and on Your sole responsibility, not on behalf
171
+ of any other Contributor, and only if You agree to indemnify,
172
+ defend, and hold each Contributor harmless for any liability
173
+ incurred by, or claims asserted against, such Contributor by reason
174
+ of your accepting any such warranty or additional liability.
175
+
176
+ END OF TERMS AND CONDITIONS
177
+
178
+ APPENDIX: How to apply the Apache License to your work.
179
+
180
+ To apply the Apache License to your work, attach the following
181
+ boilerplate notice, with the fields enclosed by brackets "[]"
182
+ replaced with your own identifying information. (Don't include
183
+ the brackets!) The text should be enclosed in the appropriate
184
+ comment syntax for the file format. We also recommend that a
185
+ file or class name and description of purpose be included on the
186
+ same "printed page" as the copyright notice for easier
187
+ identification within third-party archives.
188
+
189
+ Copyright [yyyy] [name of copyright owner]
190
+
191
+ Licensed under the Apache License, Version 2.0 (the "License");
192
+ you may not use this file except in compliance with the License.
193
+ You may obtain a copy of the License at
194
+
195
+ http://www.apache.org/licenses/LICENSE-2.0
196
+
197
+ Unless required by applicable law or agreed to in writing, software
198
+ distributed under the License is distributed on an "AS IS" BASIS,
199
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
200
+ See the License for the specific language governing permissions and
201
+ limitations under the License.
@@ -0,0 +1,66 @@
1
+ # MIDAS
2
+ [![Conference](http://img.shields.io/badge/AAAI-2020-red.svg)](https://aaai.org/Conferences/AAAI-20/)
3
+ [![Paper](http://img.shields.io/badge/Paper-pdf-brightgreen.svg)](https://www.comp.nus.edu.sg/~sbhatia/assets/pdf/midas.pdf)
4
+ [![Poster](http://img.shields.io/badge/Poster-pdf-blueviolet.svg)](https://www.comp.nus.edu.sg/~sbhatia/assets/pdf/midasposter.pdf)
5
+ [![License](https://img.shields.io/badge/License-Apache%202.0-blue.svg)](https://github.com/bhatiasiddharth/MIDAS/blob/master/LICENSE)
6
+
7
+ <p align="center">
8
+ <img align="center" src="https://www.comp.nus.edu.sg/~sbhatia/assets/img/midasstream.png" alt="...">
9
+ </p>
10
+
11
+
12
+ Anomaly detection in graphs is a critical problem for finding suspicious behavior in innumerable systems, such as intrusion detection, fake ratings, and financial fraud. This has been a well-researched problem with majority of the proposed approaches focusing on static graphs. However, many real-world graphs are dynamic in nature, and methods based on static connections may miss temporal characteristics of the graphs and anomalies.
13
+
14
+ Among the methods focusing on dynamic graphs, most of them have edges aggregated into graph snapshots. However, to minimize the effect of malicious activities and start recovery as soon as possible, we need to detect anomalies in real-time or near real-time i.e. to identify whether an incoming edge is anomalous or not, as soon as we receive it. In addition, since the number of vertices can increase as we process the stream of edges, we need an algorithm which uses constant memory in graph size. Moreover, fraudulent or anomalous events in many applications occur in microclusters or suddenly arriving groups of suspiciously similar edges e.g. denial of service attacks in network traffic data and lockstep behavior.
15
+
16
+ In this work, we propose MIDAS, short for Microcluster-Based Detector of Anomalies in Edge Streams, which detects microcluster anomalies, or suddenly arriving groups of suspiciously similar edges, in edge streams, using constant time and memory. In addition, by using a principled hypothesis testing framework, MIDAS provides theoretical bounds on the false positive probability, which earlier methods do not provide. Also, we are up to 48% more accurate while being up to 644 times faster than state of the art approaches.
17
+
18
+ For more details, please read the paper - [MIDAS:Microcluster-Based Detector of Anomalies in Edge Streams](https://www.comp.nus.edu.sg/~sbhatia/assets/pdf/midas.pdf). *Siddharth Bhatia, Bryan Hooi, Minji Yoon, Kijung Shin, Christos Faloutsos*. AAAI 2020.
19
+
20
+
21
+ ## Getting started
22
+ 1. Run `make` to compile code and create the binary.
23
+ 2. Run `./midas -i ` followed by the input file path and name.
24
+ 3. Run `make clean` to clean binaries.
25
+
26
+
27
+ ## Demo
28
+ 1. Run `./demo.sh` to compile the code and run it on example dataset.
29
+
30
+
31
+ ## Command line options
32
+ * `-h --help`: produce help message
33
+ * `-i --input`: input file name
34
+ * `-o --output`: output file name (default: scores.txt)
35
+ * `-r --rows`: Number of Hash Functions (default: 2)
36
+ * `-b --buckets`: Number of Buckets (default: 769)
37
+ * `-a --alpha`: Temporal Decay Factor (default: 0.6)
38
+ * `--norelations` : Run MIDAS instead of MIDAS-R
39
+ * `--undirected` : Treat graph as undirected instead of directed
40
+
41
+
42
+ ## Input file format
43
+ MIDAS expects the input edge stream to be stored in a single file containing the following three columns in order:
44
+ 1. `source (int)`: source ID of the edge
45
+ 2. `destination (int)`: destination ID of the edge
46
+ 3. `time (int)`: time stamp of the edge
47
+
48
+ Thus, each line represents an edge. Edges should be sorted in non-decreasing order of their time stamps and the column delimiter should be `,`
49
+
50
+
51
+ ## Datasets
52
+ 1. [DARPA](https://www.ll.mit.edu/r-d/datasets/1998-darpa-intrusion-detection-evaluation-dataset)
53
+ 2. [TwitterWorldCup2014](http://odds.cs.stonybrook.edu/twitterworldcup2014-dataset)
54
+ 3. [TwitterSecurity](http://odds.cs.stonybrook.edu/twittersecurity-dataset)
55
+
56
+ ## Citation
57
+ If you use this code for your research, please consider citing our paper.
58
+
59
+ ```
60
+ @article{bhatia2019midas,
61
+ title={MIDAS: Microcluster-Based Detector of Anomalies in Edge Streams},
62
+ author={Bhatia, Siddharth and Hooi, Bryan and Yoon, Minji and Shin, Kijung and Faloutsos, Christos},
63
+ journal={arXiv preprint arXiv:1911.04464},
64
+ year={2019}
65
+ }
66
+ ```
@@ -0,0 +1,88 @@
1
+ #define MIN(X, Y) (((X) < (Y)) ? (X) : (Y))
2
+ #define MAX(X, Y) (((X) > (Y)) ? (X) : (Y))
3
+
4
+ #include <iostream>
5
+ #include <math.h>
6
+ #include <algorithm>
7
+ #include <vector>
8
+ #include "anom.hpp"
9
+ #include "edgehash.hpp"
10
+ #include "nodehash.hpp"
11
+
12
+ vector<double>* midas(vector<int>& src, vector<int>& dst, vector<int>& times, int num_rows, int num_buckets)
13
+ {
14
+ int m = *max_element(src.begin(), src.end());
15
+ Edgehash cur_count(num_rows, num_buckets, m);
16
+ Edgehash total_count(num_rows, num_buckets, m);
17
+ vector<double>* anom_score = new vector<double>(src.size());
18
+ int cur_t = 1, size = src.size(), cur_src, cur_dst;
19
+ double cur_mean, sqerr, cur_score;
20
+ for (int i = 0; i < size; i++) {
21
+
22
+ if (i == 0 || times[i] > cur_t) {
23
+ cur_count.clear();
24
+ cur_t = times[i];
25
+ }
26
+
27
+ cur_src = src[i];
28
+ cur_dst = dst[i];
29
+ cur_count.insert(cur_src, cur_dst, 1);
30
+ total_count.insert(cur_src, cur_dst, 1);
31
+ cur_mean = total_count.get_count(cur_src, cur_dst) / cur_t;
32
+ sqerr = pow(cur_count.get_count(cur_src, cur_dst) - cur_mean, 2);
33
+ if (cur_t == 1) cur_score = 0;
34
+ else cur_score = sqerr / cur_mean + sqerr / (cur_mean * (cur_t - 1));
35
+ (*anom_score)[i] = cur_score;
36
+ }
37
+
38
+ return anom_score;
39
+ }
40
+
41
+ double counts_to_anom(double tot, double cur, int cur_t)
42
+ {
43
+ double cur_mean = tot / cur_t;
44
+ double sqerr = pow(MAX(0, cur - cur_mean), 2);
45
+ return sqerr / cur_mean + sqerr / (cur_mean * MAX(1, cur_t - 1));
46
+ }
47
+
48
+ vector<double>* midasR(vector<int>& src, vector<int>& dst, vector<int>& times, int num_rows, int num_buckets, double factor)
49
+ {
50
+ int m = *max_element(src.begin(), src.end());
51
+ Edgehash cur_count(num_rows, num_buckets, m);
52
+ Edgehash total_count(num_rows, num_buckets, m);
53
+ Nodehash src_score(num_rows, num_buckets);
54
+ Nodehash dst_score(num_rows, num_buckets);
55
+ Nodehash src_total(num_rows, num_buckets);
56
+ Nodehash dst_total(num_rows, num_buckets);
57
+ vector<double>* anom_score = new vector<double>(src.size());
58
+ int cur_t = 1, size = src.size(), cur_src, cur_dst;
59
+ double cur_score, cur_score_src, cur_score_dst, combined_score;
60
+
61
+ for (int i = 0; i < size; i++) {
62
+
63
+ if (i == 0 || times[i] > cur_t) {
64
+ cur_count.lower(factor);
65
+ src_score.lower(factor);
66
+ dst_score.lower(factor);
67
+ cur_t = times[i];
68
+ }
69
+
70
+ cur_src = src[i];
71
+ cur_dst = dst[i];
72
+ cur_count.insert(cur_src, cur_dst, 1);
73
+ total_count.insert(cur_src, cur_dst, 1);
74
+ src_score.insert(cur_src, 1);
75
+ dst_score.insert(cur_dst, 1);
76
+ src_total.insert(cur_src, 1);
77
+ dst_total.insert(cur_dst, 1);
78
+ cur_score = counts_to_anom(total_count.get_count(cur_src, cur_dst), cur_count.get_count(cur_src, cur_dst), cur_t);
79
+ cur_score_src = counts_to_anom(src_total.get_count(cur_src), src_score.get_count(cur_src), cur_t);
80
+ cur_score_dst = counts_to_anom(dst_total.get_count(cur_dst), dst_score.get_count(cur_dst), cur_t);
81
+ //combined_score = MAX(cur_score_src, cur_score_dst) + cur_score;
82
+ //combined_score = cur_score_src + cur_score_dst + cur_score;
83
+ combined_score = MAX(MAX(cur_score_src, cur_score_dst), cur_score);
84
+ (*anom_score)[i] = log(1 + combined_score);
85
+ }
86
+
87
+ return anom_score;
88
+ }
@@ -0,0 +1,10 @@
1
+ #ifndef anom_hpp
2
+ #define anom_hpp
3
+
4
+ #include <vector>
5
+ using namespace std;
6
+
7
+ vector<double>* midas(vector<int>& src, vector<int>& dst, vector<int>& times, int num_rows, int num_buckets);
8
+ vector<double>* midasR(vector<int>& src, vector<int>& dst, vector<int>& times, int num_rows, int num_buckets, double factor);
9
+
10
+ #endif /* anom_hpp */
@@ -0,0 +1,539 @@
1
+ /*
2
+ __ _ _ __ __ _ _ __ __ _ _ __ ___ ___
3
+ / _` | '__/ _` | '_ \ / _` | '__/ __|/ _ \ Argument Parser for Modern C++
4
+ | (_| | | | (_| | |_) | (_| | | \__ \ __/ http://github.com/p-ranav/argparse
5
+ \__,_|_| \__, | .__/ \__,_|_| |___/\___|
6
+ |___/|_|
7
+
8
+ Licensed under the MIT License <http://opensource.org/licenses/MIT>.
9
+ SPDX-License-Identifier: MIT
10
+ Copyright (c) 2019 Pranav Srinivas Kumar <pranav.srinivas.kumar@gmail.com>.
11
+
12
+ Permission is hereby granted, free of charge, to any person obtaining a copy
13
+ of this software and associated documentation files (the "Software"), to deal
14
+ in the Software without restriction, including without limitation the rights
15
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
16
+ copies of the Software, and to permit persons to whom the Software is
17
+ furnished to do so, subject to the following conditions:
18
+
19
+ The above copyright notice and this permission notice shall be included in all
20
+ copies or substantial portions of the Software.
21
+
22
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
23
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
24
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
25
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
26
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
27
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
28
+ SOFTWARE.
29
+ */
30
+ #pragma once
31
+ #include <algorithm>
32
+ #include <any>
33
+ #include <functional>
34
+ #include <iomanip>
35
+ #include <iostream>
36
+ #include <iterator>
37
+ #include <list>
38
+ #include <map>
39
+ #include <memory>
40
+ #include <numeric>
41
+ #include <sstream>
42
+ #include <stdexcept>
43
+ #include <string>
44
+ #include <type_traits>
45
+ #include <vector>
46
+
47
+ namespace argparse {
48
+
49
+ namespace details { // namespace for helper methods
50
+
51
+ template <typename... Ts> struct is_container_helper {};
52
+
53
+ template <typename T, typename _ = void>
54
+ struct is_container : std::false_type {};
55
+
56
+ template <> struct is_container<std::string> : std::false_type {};
57
+
58
+ template <typename T>
59
+ struct is_container<
60
+ T,
61
+ std::conditional_t<false,
62
+ is_container_helper<typename T::value_type,
63
+ decltype(std::declval<T>().begin()),
64
+ decltype(std::declval<T>().end()),
65
+ decltype(std::declval<T>().size())>,
66
+ void>> : public std::true_type {};
67
+
68
+ template <typename T>
69
+ static constexpr bool is_container_v = is_container<T>::value;
70
+
71
+ template <typename T>
72
+ using enable_if_container = std::enable_if_t<is_container_v<T>, T>;
73
+
74
+ template <typename T>
75
+ using enable_if_not_container = std::enable_if_t<!is_container_v<T>, T>;
76
+ } // namespace
77
+
78
+ class Argument {
79
+ friend class ArgumentParser;
80
+
81
+ public:
82
+ Argument() = default;
83
+
84
+ template <typename... Args>
85
+ explicit Argument(Args... args)
86
+ : mNames({std::move(args)...}), mIsOptional((is_optional(args) || ...)) {
87
+ std::sort(
88
+ mNames.begin(), mNames.end(), [](const auto &lhs, const auto &rhs) {
89
+ return lhs.size() == rhs.size() ? lhs < rhs : lhs.size() < rhs.size();
90
+ });
91
+ }
92
+
93
+ Argument &help(std::string aHelp) {
94
+ mHelp = std::move(aHelp);
95
+ return *this;
96
+ }
97
+
98
+ Argument &default_value(std::any aDefaultValue) {
99
+ mDefaultValue = std::move(aDefaultValue);
100
+ return *this;
101
+ }
102
+
103
+ Argument &required() {
104
+ mIsRequired = true;
105
+ return *this;
106
+ }
107
+
108
+ Argument &implicit_value(std::any aImplicitValue) {
109
+ mImplicitValue = std::move(aImplicitValue);
110
+ mNumArgs = 0;
111
+ return *this;
112
+ }
113
+
114
+ Argument &action(std::function<std::any(const std::string &)> aAction) {
115
+ mAction = std::move(aAction);
116
+ return *this;
117
+ }
118
+
119
+ Argument &nargs(size_t aNumArgs) {
120
+ mNumArgs = aNumArgs;
121
+ return *this;
122
+ }
123
+
124
+ template <typename Iterator>
125
+ Iterator consume(Iterator start, Iterator end, std::string usedName = {}) {
126
+ if (mIsUsed) {
127
+ throw std::runtime_error("Duplicate argument");
128
+ }
129
+ mIsUsed = true;
130
+ mUsedName = std::move(usedName);
131
+ if (mNumArgs == 0) {
132
+ mValues.emplace_back(mImplicitValue);
133
+ return start;
134
+ } else if (mNumArgs <= static_cast<size_t>(std::distance(start, end))) {
135
+ end = std::next(start, mNumArgs);
136
+ if (std::any_of(start, end, Argument::is_optional)) {
137
+ throw std::runtime_error("optional argument in parameter sequence");
138
+ }
139
+ std::transform(start, end, std::back_inserter(mValues), mAction);
140
+ return end;
141
+ } else if (mDefaultValue.has_value()) {
142
+ return start;
143
+ } else {
144
+ throw std::runtime_error("Too few arguments");
145
+ }
146
+ }
147
+
148
+ /*
149
+ * @throws std::runtime_error if argument values are not valid
150
+ */
151
+ void validate() const {
152
+ if (mIsOptional) {
153
+ if (mIsUsed && mValues.size() != mNumArgs && !mDefaultValue.has_value()) {
154
+ std::stringstream stream;
155
+ stream << mUsedName << ": expected " << mNumArgs
156
+ << " argument(s). " << mValues.size() << " provided.";
157
+ throw std::runtime_error(stream.str());
158
+ } else {
159
+ // TODO: check if an implicit value was programmed for this argument
160
+ if (!mIsUsed && !mDefaultValue.has_value() && mIsRequired) {
161
+ std::stringstream stream;
162
+ stream << mNames[0] << ": required.";
163
+ throw std::runtime_error(stream.str());
164
+ }
165
+ if (mIsUsed && mIsRequired && mValues.size() == 0) {
166
+ std::stringstream stream;
167
+ stream << mUsedName << ": no value provided.";
168
+ throw std::runtime_error(stream.str());
169
+ }
170
+ }
171
+ } else {
172
+ if (mValues.size() != mNumArgs && !mDefaultValue.has_value()) {
173
+ std::stringstream stream;
174
+ stream << mUsedName << ": expected " << mNumArgs
175
+ << " argument(s). " << mValues.size() << " provided.";
176
+ throw std::runtime_error(stream.str());
177
+ }
178
+ }
179
+ }
180
+
181
+ size_t get_arguments_length() const {
182
+ return std::accumulate(std::begin(mNames), std::end(mNames), size_t(0),
183
+ [](const auto &sum, const auto &s) {
184
+ return sum + s.size() +
185
+ 1; // +1 for space between names
186
+ });
187
+ }
188
+
189
+ friend std::ostream &operator<<(std::ostream &stream,
190
+ const Argument &argument) {
191
+ std::stringstream nameStream;
192
+ std::copy(std::begin(argument.mNames), std::end(argument.mNames),
193
+ std::ostream_iterator<std::string>(nameStream, " "));
194
+ stream << nameStream.str() << "\t" << argument.mHelp;
195
+ if (argument.mIsRequired)
196
+ stream << "[Required]";
197
+ stream << "\n";
198
+ return stream;
199
+ }
200
+
201
+ template <typename T> bool operator!=(const T &aRhs) const {
202
+ return !(*this == aRhs);
203
+ }
204
+
205
+ /*
206
+ * Entry point for template non-container types
207
+ * @throws std::logic_error in case of incompatible types
208
+ */
209
+ template <typename T>
210
+ std::enable_if_t<!details::is_container_v<T>, bool> operator==(const T &aRhs) const {
211
+ return get<T>() == aRhs;
212
+ }
213
+
214
+ /*
215
+ * Template specialization for containers
216
+ * @throws std::logic_error in case of incompatible types
217
+ */
218
+ template <typename T>
219
+ std::enable_if_t<details::is_container_v<T>, bool> operator==(const T &aRhs) const {
220
+ using ValueType = typename T::value_type;
221
+ auto tLhs = get<T>();
222
+ if (tLhs.size() != aRhs.size())
223
+ return false;
224
+ else {
225
+ return std::equal(std::begin(tLhs), std::end(tLhs), std::begin(aRhs),
226
+ [](const auto &lhs, const auto &rhs) {
227
+ return std::any_cast<const ValueType &>(lhs) == rhs;
228
+ });
229
+ }
230
+ }
231
+
232
+ private:
233
+ static bool is_integer(const std::string &aValue) {
234
+ if (aValue.empty() ||
235
+ ((!isdigit(aValue[0])) && (aValue[0] != '-') && (aValue[0] != '+')))
236
+ return false;
237
+ char *tPtr;
238
+ strtol(aValue.c_str(), &tPtr, 10);
239
+ return (*tPtr == 0);
240
+ }
241
+
242
+ static bool is_float(const std::string &aValue) {
243
+ std::istringstream tStream(aValue);
244
+ float tFloat;
245
+ // noskipws considers leading whitespace invalid
246
+ tStream >> std::noskipws >> tFloat;
247
+ // Check the entire string was consumed
248
+ // and if either failbit or badbit is set
249
+ return tStream.eof() && !tStream.fail();
250
+ }
251
+
252
+ // If an argument starts with "-" or "--", then it's optional
253
+ static bool is_optional(const std::string &aName) {
254
+ return (!aName.empty() && aName[0] == '-' && !is_integer(aName) &&
255
+ !is_float(aName));
256
+ }
257
+
258
+ static bool is_positional(const std::string &aName) {
259
+ return !is_optional(aName);
260
+ }
261
+
262
+ /*
263
+ * Getter for template non-container types
264
+ * @throws std::logic_error in case of incompatible types
265
+ */
266
+ template <typename T> details::enable_if_not_container<T> get() const {
267
+ if (!mValues.empty()) {
268
+ return std::any_cast<T>(mValues.front());
269
+ }
270
+ if (mDefaultValue.has_value()) {
271
+ return std::any_cast<T>(mDefaultValue);
272
+ }
273
+ throw std::logic_error("No value provided");
274
+ }
275
+
276
+ /*
277
+ * Getter for container types
278
+ * @throws std::logic_error in case of incompatible types
279
+ */
280
+ template <typename CONTAINER> details::enable_if_container<CONTAINER> get() const {
281
+ using ValueType = typename CONTAINER::value_type;
282
+ CONTAINER tResult;
283
+ if (!mValues.empty()) {
284
+ std::transform(
285
+ std::begin(mValues), std::end(mValues), std::back_inserter(tResult),
286
+ [](const auto &value) { return std::any_cast<ValueType>(value); });
287
+ return tResult;
288
+ }
289
+ if (mDefaultValue.has_value()) {
290
+ const auto &tDefaultValues =
291
+ std::any_cast<const CONTAINER &>(mDefaultValue);
292
+ std::transform(std::begin(tDefaultValues), std::end(tDefaultValues),
293
+ std::back_inserter(tResult), [](const auto &value) {
294
+ return std::any_cast<ValueType>(value);
295
+ });
296
+ return tResult;
297
+ }
298
+ throw std::logic_error("No value provided");
299
+ }
300
+
301
+ std::vector<std::string> mNames;
302
+ std::string mUsedName;
303
+ std::string mHelp;
304
+ std::any mDefaultValue;
305
+ std::any mImplicitValue;
306
+ std::function<std::any(const std::string &)> mAction =
307
+ [](const std::string &aValue) { return aValue; };
308
+ std::vector<std::any> mValues;
309
+ std::vector<std::string> mRawValues;
310
+ size_t mNumArgs = 1;
311
+ bool mIsOptional = false;
312
+ bool mIsRequired = false;
313
+ bool mIsUsed = false; // relevant for optional arguments. True if used by user
314
+
315
+ public:
316
+ static constexpr auto mHelpOption = "-h";
317
+ static constexpr auto mHelpOptionLong = "--help";
318
+ };
319
+
320
+ class ArgumentParser {
321
+ public:
322
+ explicit ArgumentParser(std::string aProgramName = {})
323
+ : mProgramName(std::move(aProgramName)) {
324
+ add_argument(Argument::mHelpOption, Argument::mHelpOptionLong)
325
+ .help("show this help message and exit")
326
+ .nargs(0)
327
+ .default_value(false)
328
+ .implicit_value(true);
329
+ }
330
+
331
+ // Parameter packing
332
+ // Call add_argument with variadic number of string arguments
333
+ template <typename... Targs> Argument &add_argument(Targs... Fargs) {
334
+ std::shared_ptr<Argument> tArgument =
335
+ std::make_shared<Argument>(std::move(Fargs)...);
336
+
337
+ if (tArgument->mIsOptional)
338
+ mOptionalArguments.emplace_back(tArgument);
339
+ else
340
+ mPositionalArguments.emplace_back(tArgument);
341
+
342
+ for (const auto &mName : tArgument->mNames) {
343
+ mArgumentMap.insert_or_assign(mName, tArgument);
344
+ }
345
+ return *tArgument;
346
+ }
347
+
348
+ // Parameter packed add_parents method
349
+ // Accepts a variadic number of ArgumentParser objects
350
+ template <typename... Targs> void add_parents(Targs... Fargs) {
351
+ const auto tNewParentParsers = {Fargs...};
352
+ for (const auto &tParentParser : tNewParentParsers) {
353
+ const auto &tPositionalArguments = tParentParser.mPositionalArguments;
354
+ std::copy(std::begin(tPositionalArguments),
355
+ std::end(tPositionalArguments),
356
+ std::back_inserter(mPositionalArguments));
357
+
358
+ const auto &tOptionalArguments = tParentParser.mOptionalArguments;
359
+ std::copy(std::begin(tOptionalArguments), std::end(tOptionalArguments),
360
+ std::back_inserter(mOptionalArguments));
361
+
362
+ const auto &tArgumentMap = tParentParser.mArgumentMap;
363
+ for (const auto &[tKey, tValue] : tArgumentMap) {
364
+ mArgumentMap.insert_or_assign(tKey, tValue);
365
+ }
366
+ }
367
+ std::move(std::begin(tNewParentParsers), std::end(tNewParentParsers),
368
+ std::back_inserter(mParentParsers));
369
+ }
370
+
371
+ /* Call parse_args_internal - which does all the work
372
+ * Then, validate the parsed arguments
373
+ * This variant is used mainly for testing
374
+ * @throws std::runtime_error in case of any invalid argument
375
+ */
376
+ void parse_args(const std::vector<std::string> &aArguments) {
377
+ parse_args_internal(aArguments);
378
+ parse_args_validate();
379
+ }
380
+
381
+ /* Main entry point for parsing command-line arguments using this
382
+ * ArgumentParser
383
+ * @throws std::runtime_error in case of any invalid argument
384
+ */
385
+ void parse_args(int argc, const char *const argv[]) {
386
+ std::vector<std::string> arguments;
387
+ std::copy(argv, argv + argc, std::back_inserter(arguments));
388
+ parse_args(arguments);
389
+ }
390
+
391
+ /* Getter enabled for all template types other than std::vector and std::list
392
+ * @throws std::logic_error in case of an invalid argument name
393
+ * @throws std::logic_error in case of incompatible types
394
+ */
395
+ template <typename T = std::string> T get(const std::string &aArgumentName) {
396
+ auto tIterator = mArgumentMap.find(aArgumentName);
397
+ if (tIterator != mArgumentMap.end()) {
398
+ return tIterator->second->get<T>();
399
+ }
400
+ throw std::logic_error("No such argument");
401
+ }
402
+
403
+ /* Indexing operator. Return a reference to an Argument object
404
+ * Used in conjuction with Argument.operator== e.g., parser["foo"] == true
405
+ * @throws std::logic_error in case of an invalid argument name
406
+ */
407
+ Argument &operator[](const std::string &aArgumentName) {
408
+ auto tIterator = mArgumentMap.find(aArgumentName);
409
+ if (tIterator != mArgumentMap.end()) {
410
+ return *(tIterator->second);
411
+ }
412
+ throw std::logic_error("No such argument");
413
+ }
414
+
415
+ // Printing the one and only help message
416
+ // I've stuck with a simple message format, nothing fancy.
417
+ // TODO: support user-defined help and usage messages for the ArgumentParser
418
+ std::string print_help() {
419
+ std::stringstream stream;
420
+ stream << std::left;
421
+ stream << "Usage: ./" << mProgramName << " [options] ";
422
+ size_t tLongestArgumentLength = get_length_of_longest_argument();
423
+
424
+ for (const auto &argument : mPositionalArguments) {
425
+ stream << argument->mNames.front() << " ";
426
+ }
427
+ stream << "\n\n";
428
+
429
+ if (!mPositionalArguments.empty())
430
+ stream << "Positional arguments:\n";
431
+
432
+ for (const auto &mPositionalArgument : mPositionalArguments) {
433
+ stream.width(tLongestArgumentLength);
434
+ stream << *mPositionalArgument;
435
+ }
436
+
437
+ if (!mOptionalArguments.empty())
438
+ stream << (mPositionalArguments.empty() ? "" : "\n")
439
+ << "Options:\n";
440
+
441
+ for (const auto &mOptionalArgument : mOptionalArguments) {
442
+ stream.width(tLongestArgumentLength);
443
+ stream << *mOptionalArgument;
444
+ }
445
+
446
+ std::cout << stream.str();
447
+ return stream.str();
448
+ }
449
+
450
+ private:
451
+ /*
452
+ * @throws std::runtime_error in case of any invalid argument
453
+ */
454
+ void parse_args_internal(const std::vector<std::string> &aArguments) {
455
+ if (mProgramName.empty() && !aArguments.empty()) {
456
+ mProgramName = aArguments.front();
457
+ }
458
+ auto end = std::end(aArguments);
459
+ auto positionalArgumentIt = std::begin(mPositionalArguments);
460
+ for (auto it = std::next(std::begin(aArguments)); it != end;) {
461
+ const auto &tCurrentArgument = *it;
462
+ if (tCurrentArgument == Argument::mHelpOption ||
463
+ tCurrentArgument == Argument::mHelpOptionLong) {
464
+ throw std::runtime_error("help called");
465
+ }
466
+ if (Argument::is_positional(tCurrentArgument)) {
467
+ if (positionalArgumentIt == std::end(mPositionalArguments)) {
468
+ throw std::runtime_error(
469
+ "Maximum number of positional arguments exceeded");
470
+ }
471
+ auto tArgument = *(positionalArgumentIt++);
472
+ it = tArgument->consume(it, end);
473
+ } else if (auto tIterator = mArgumentMap.find(tCurrentArgument);
474
+ tIterator != mArgumentMap.end()) {
475
+ auto tArgument = tIterator->second;
476
+ it = tArgument->consume(std::next(it), end, tCurrentArgument);
477
+ } else if (const auto &tCompoundArgument = tCurrentArgument;
478
+ tCompoundArgument.size() > 1 && tCompoundArgument[0] == '-' &&
479
+ tCompoundArgument[1] != '-') {
480
+ ++it;
481
+ for (size_t j = 1; j < tCompoundArgument.size(); j++) {
482
+ auto tCurrentArgument = std::string{'-', tCompoundArgument[j]};
483
+ if (auto tIterator = mArgumentMap.find(tCurrentArgument);
484
+ tIterator != mArgumentMap.end()) {
485
+ auto tArgument = tIterator->second;
486
+ it = tArgument->consume(it, end, tCurrentArgument);
487
+ } else {
488
+ throw std::runtime_error("Unknown argument");
489
+ }
490
+ }
491
+ } else {
492
+ throw std::runtime_error("Unknown argument");
493
+ }
494
+ }
495
+ }
496
+
497
+ /*
498
+ * @throws std::runtime_error in case of any invalid argument
499
+ */
500
+ void parse_args_validate() {
501
+ // Check if all arguments are parsed
502
+ std::for_each(std::begin(mArgumentMap), std::end(mArgumentMap),
503
+ [](const auto &argPair) {
504
+ const auto &tArgument = argPair.second;
505
+ tArgument->validate();
506
+ });
507
+ }
508
+
509
+ // Used by print_help.
510
+ size_t get_length_of_longest_argument() {
511
+ if (mArgumentMap.empty())
512
+ return 0;
513
+ std::vector<size_t> argumentLengths(mArgumentMap.size());
514
+ std::transform(std::begin(mArgumentMap), std::end(mArgumentMap),
515
+ std::begin(argumentLengths), [](const auto &argPair) {
516
+ const auto &tArgument = argPair.second;
517
+ return tArgument->get_arguments_length();
518
+ });
519
+ return *std::max_element(std::begin(argumentLengths),
520
+ std::end(argumentLengths));
521
+ }
522
+
523
+ std::string mProgramName;
524
+ std::vector<ArgumentParser> mParentParsers;
525
+ std::vector<std::shared_ptr<Argument>> mPositionalArguments;
526
+ std::vector<std::shared_ptr<Argument>> mOptionalArguments;
527
+ std::map<std::string, std::shared_ptr<Argument>> mArgumentMap;
528
+ };
529
+
530
+ #define PARSE_ARGS(parser, argc, argv) \
531
+ try { \
532
+ parser.parse_args(argc, argv); \
533
+ } catch (const std::runtime_error &err) { \
534
+ std::cout << err.what() << std::endl; \
535
+ parser.print_help(); \
536
+ exit(0); \
537
+ }
538
+
539
+ } // namespace argparse